air <- read.csv("listings-complete.csv")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)

colnames(air)
##  [1] "id"                                          
##  [2] "listing_url"                                 
##  [3] "scrape_id"                                   
##  [4] "last_scraped"                                
##  [5] "source"                                      
##  [6] "name"                                        
##  [7] "description"                                 
##  [8] "neighborhood_overview"                       
##  [9] "picture_url"                                 
## [10] "host_id"                                     
## [11] "host_url"                                    
## [12] "host_name"                                   
## [13] "host_since"                                  
## [14] "host_location"                               
## [15] "host_about"                                  
## [16] "host_response_time"                          
## [17] "host_response_rate"                          
## [18] "host_acceptance_rate"                        
## [19] "host_is_superhost"                           
## [20] "host_thumbnail_url"                          
## [21] "host_picture_url"                            
## [22] "host_neighbourhood"                          
## [23] "host_listings_count"                         
## [24] "host_total_listings_count"                   
## [25] "host_verifications"                          
## [26] "host_has_profile_pic"                        
## [27] "host_identity_verified"                      
## [28] "neighbourhood"                               
## [29] "neighbourhood_cleansed"                      
## [30] "neighbourhood_group_cleansed"                
## [31] "latitude"                                    
## [32] "longitude"                                   
## [33] "property_type"                               
## [34] "room_type"                                   
## [35] "accommodates"                                
## [36] "bathrooms"                                   
## [37] "bathrooms_text"                              
## [38] "bedrooms"                                    
## [39] "beds"                                        
## [40] "amenities"                                   
## [41] "price"                                       
## [42] "minimum_nights"                              
## [43] "maximum_nights"                              
## [44] "minimum_minimum_nights"                      
## [45] "maximum_minimum_nights"                      
## [46] "minimum_maximum_nights"                      
## [47] "maximum_maximum_nights"                      
## [48] "minimum_nights_avg_ntm"                      
## [49] "maximum_nights_avg_ntm"                      
## [50] "calendar_updated"                            
## [51] "has_availability"                            
## [52] "availability_30"                             
## [53] "availability_60"                             
## [54] "availability_90"                             
## [55] "availability_365"                            
## [56] "calendar_last_scraped"                       
## [57] "number_of_reviews"                           
## [58] "number_of_reviews_ltm"                       
## [59] "number_of_reviews_l30d"                      
## [60] "first_review"                                
## [61] "last_review"                                 
## [62] "review_scores_rating"                        
## [63] "review_scores_accuracy"                      
## [64] "review_scores_cleanliness"                   
## [65] "review_scores_checkin"                       
## [66] "review_scores_communication"                 
## [67] "review_scores_location"                      
## [68] "review_scores_value"                         
## [69] "license"                                     
## [70] "instant_bookable"                            
## [71] "calculated_host_listings_count"              
## [72] "calculated_host_listings_count_entire_homes" 
## [73] "calculated_host_listings_count_private_rooms"
## [74] "calculated_host_listings_count_shared_rooms" 
## [75] "reviews_per_month"
# Convert character columns to factors in the dataset
air <- air %>% mutate_if(is.character, as.factor) 

# Extract numbers from "bathrooms_text" and convert to numeric
air$bathrooms <- as.numeric(gsub("[^0-9.]+", "", air$bathrooms_text))

# Define the selected variables for analysis
selected_variables <- c(
  "host_is_superhost", "host_response_time", "host_response_rate", 
  "host_acceptance_rate", "accommodates", 
  "bathrooms", "bedrooms", "price", "neighbourhood_cleansed", 
  "host_listings_count", "minimum_nights", "maximum_nights", 
  "instant_bookable", "host_identity_verified", "availability_30", 
  "availability_60", "availability_90","review_scores_rating","reviews_per_month", "has_availability","availability_365","number_of_reviews"
)

# Create a new dataframe with only the selected variables
air_new <- air[selected_variables]

# Remove rows with NA values from air_new
air_clean <- na.omit(air_new)


# Check the structure of air_clean
str(air_clean)
## 'data.frame':    4645 obs. of  22 variables:
##  $ host_is_superhost     : Factor w/ 2 levels "f","t": 2 2 1 1 1 1 1 1 1 2 ...
##  $ host_response_time    : Factor w/ 5 levels "a few days or more",..: 5 5 2 4 2 1 2 2 2 5 ...
##  $ host_response_rate    : Factor w/ 41 levels "0%","100%","11%",..: 2 2 41 2 41 1 41 41 41 2 ...
##  $ host_acceptance_rate  : Factor w/ 79 levels "0%","100%","11%",..: 75 70 79 72 2 79 1 79 79 2 ...
##  $ accommodates          : int  2 2 4 2 4 6 7 2 2 2 ...
##  $ bathrooms             : num  1 1 1 1 1 1 3 1 1 1 ...
##  $ bedrooms              : int  1 1 1 1 2 3 4 1 1 1 ...
##  $ price                 : Factor w/ 566 levels "$1,000.00","$1,029.00",..: 90 83 83 42 294 30 128 551 412 39 ...
##  $ neighbourhood_cleansed: Factor w/ 23 levels "Arbutus Ridge",..: 22 8 8 3 7 6 15 13 13 6 ...
##  $ host_listings_count   : int  1 3 1 4 1 5 1 1 1 1 ...
##  $ minimum_nights        : int  2 30 3 30 3 365 5 5 30 1 ...
##  $ maximum_nights        : int  90 180 7 1125 31 365 29 60 1125 40 ...
##  $ instant_bookable      : Factor w/ 2 levels "f","t": 1 1 2 1 1 1 1 1 1 1 ...
##  $ host_identity_verified: Factor w/ 2 levels "f","t": 2 2 2 2 2 2 2 2 2 2 ...
##  $ availability_30       : int  18 0 0 1 0 30 10 0 17 0 ...
##  $ availability_60       : int  22 5 0 1 0 60 10 0 35 0 ...
##  $ availability_90       : int  34 6 0 1 0 90 10 0 54 0 ...
##  $ review_scores_rating  : num  4.68 4.92 4.76 4.69 4.57 4 5 4.6 4.54 4.98 ...
##  $ reviews_per_month     : num  2.86 0.67 0.22 1.66 0.12 0.05 0.05 1.6 0.81 3.25 ...
##  $ has_availability      : Factor w/ 2 levels "f","t": 2 2 2 2 2 2 2 2 2 2 ...
##  $ availability_365      : int  212 152 0 70 194 90 19 0 226 0 ...
##  $ number_of_reviews     : int  443 96 34 265 7 3 7 203 118 466 ...
##  - attr(*, "na.action")= 'omit' Named int [1:1330] 9 23 72 77 83 86 87 92 93 94 ...
##   ..- attr(*, "names")= chr [1:1330] "9" "23" "72" "77" ...
# Convert "price" column to numeric
air_clean$price <- as.numeric(gsub("[$,]", "", air_clean$price))

# Convert "host_acceptance_rate" and "host_response_rate" columns to numeric
air_clean$host_acceptance_rate <- as.numeric(gsub("%", "", air_clean$host_acceptance_rate)) / 100
## Warning: NAs introduced by coercion
air_clean$host_response_rate <- as.numeric(gsub("%", "", air_clean$host_response_rate)) / 100
## Warning: NAs introduced by coercion
# Check the structure of air_clean again
str(air_clean)
## 'data.frame':    4645 obs. of  22 variables:
##  $ host_is_superhost     : Factor w/ 2 levels "f","t": 2 2 1 1 1 1 1 1 1 2 ...
##  $ host_response_time    : Factor w/ 5 levels "a few days or more",..: 5 5 2 4 2 1 2 2 2 5 ...
##  $ host_response_rate    : num  1 1 NA 1 NA 0 NA NA NA 1 ...
##  $ host_acceptance_rate  : num  0.96 0.91 NA 0.93 1 NA 0 NA NA 1 ...
##  $ accommodates          : int  2 2 4 2 4 6 7 2 2 2 ...
##  $ bathrooms             : num  1 1 1 1 1 1 3 1 1 1 ...
##  $ bedrooms              : int  1 1 1 1 2 3 4 1 1 1 ...
##  $ price                 : num  157 150 150 110 350 100 195 94 51 109 ...
##  $ neighbourhood_cleansed: Factor w/ 23 levels "Arbutus Ridge",..: 22 8 8 3 7 6 15 13 13 6 ...
##  $ host_listings_count   : int  1 3 1 4 1 5 1 1 1 1 ...
##  $ minimum_nights        : int  2 30 3 30 3 365 5 5 30 1 ...
##  $ maximum_nights        : int  90 180 7 1125 31 365 29 60 1125 40 ...
##  $ instant_bookable      : Factor w/ 2 levels "f","t": 1 1 2 1 1 1 1 1 1 1 ...
##  $ host_identity_verified: Factor w/ 2 levels "f","t": 2 2 2 2 2 2 2 2 2 2 ...
##  $ availability_30       : int  18 0 0 1 0 30 10 0 17 0 ...
##  $ availability_60       : int  22 5 0 1 0 60 10 0 35 0 ...
##  $ availability_90       : int  34 6 0 1 0 90 10 0 54 0 ...
##  $ review_scores_rating  : num  4.68 4.92 4.76 4.69 4.57 4 5 4.6 4.54 4.98 ...
##  $ reviews_per_month     : num  2.86 0.67 0.22 1.66 0.12 0.05 0.05 1.6 0.81 3.25 ...
##  $ has_availability      : Factor w/ 2 levels "f","t": 2 2 2 2 2 2 2 2 2 2 ...
##  $ availability_365      : int  212 152 0 70 194 90 19 0 226 0 ...
##  $ number_of_reviews     : int  443 96 34 265 7 3 7 203 118 466 ...
##  - attr(*, "na.action")= 'omit' Named int [1:1330] 9 23 72 77 83 86 87 92 93 94 ...
##   ..- attr(*, "names")= chr [1:1330] "9" "23" "72" "77" ...
# Summarize the cleaned dataset
summary(air_clean)
##  host_is_superhost          host_response_time host_response_rate
##  f:2788            a few days or more:  45     Min.   :0.0000    
##  t:1857            N/A               : 807     1st Qu.:1.0000    
##                    within a day      : 230     Median :1.0000    
##                    within a few hours: 618     Mean   :0.9746    
##                    within an hour    :2945     3rd Qu.:1.0000    
##                                                Max.   :1.0000    
##                                                NA's   :807       
##  host_acceptance_rate  accommodates      bathrooms        bedrooms     
##  Min.   :0.0000       Min.   : 1.000   Min.   :0.000   Min.   : 1.000  
##  1st Qu.:0.9100       1st Qu.: 2.000   1st Qu.:1.000   1st Qu.: 1.000  
##  Median :0.9900       Median : 3.000   Median :1.000   Median : 1.000  
##  Mean   :0.9044       Mean   : 3.597   Mean   :1.333   Mean   : 1.612  
##  3rd Qu.:1.0000       3rd Qu.: 4.000   3rd Qu.:1.500   3rd Qu.: 2.000  
##  Max.   :1.0000       Max.   :16.000   Max.   :7.000   Max.   :10.000  
##  NA's   :568                                                           
##      price                     neighbourhood_cleansed host_listings_count
##  Min.   :  14.0   Downtown                :1236       Min.   :  1.000    
##  1st Qu.: 105.0   Kitsilano               : 369       1st Qu.:  1.000    
##  Median : 150.0   West End                : 340       Median :  1.000    
##  Mean   : 190.6   Kensington-Cedar Cottage: 305       Mean   :  8.332    
##  3rd Qu.: 216.0   Mount Pleasant          : 279       3rd Qu.:  4.000    
##  Max.   :9888.0   Downtown Eastside       : 253       Max.   :513.000    
##                   (Other)                 :1863                          
##  minimum_nights   maximum_nights instant_bookable host_identity_verified
##  Min.   :  1.00   Min.   :   1   f:3540           f: 262                
##  1st Qu.:  2.00   1st Qu.:  90   t:1105           t:4383                
##  Median :  3.00   Median : 365                                          
##  Mean   : 14.22   Mean   : 569                                          
##  3rd Qu.: 30.00   3rd Qu.:1125                                          
##  Max.   :399.00   Max.   :1125                                          
##                                                                         
##  availability_30  availability_60 availability_90 review_scores_rating
##  Min.   : 0.000   Min.   : 0.00   Min.   : 0.00   Min.   :0.000       
##  1st Qu.: 0.000   1st Qu.: 0.00   1st Qu.: 4.00   1st Qu.:4.710       
##  Median : 6.000   Median :18.00   Median :31.00   Median :4.870       
##  Mean   : 8.989   Mean   :21.92   Mean   :35.15   Mean   :4.754       
##  3rd Qu.:15.000   3rd Qu.:38.00   3rd Qu.:61.00   3rd Qu.:5.000       
##  Max.   :30.000   Max.   :60.00   Max.   :90.00   Max.   :5.000       
##                                                                       
##  reviews_per_month has_availability availability_365 number_of_reviews
##  Min.   : 0.01     f:  30           Min.   :  0      Min.   :  1.00   
##  1st Qu.: 0.31     t:4615           1st Qu.: 29      1st Qu.:  5.00   
##  Median : 1.00                      Median : 97      Median : 17.00   
##  Mean   : 1.64                      Mean   :130      Mean   : 44.77   
##  3rd Qu.: 2.55                      3rd Qu.:217      3rd Qu.: 55.00   
##  Max.   :11.15                      Max.   :365      Max.   :863.00   
## 
# Calculate the count of bathrooms
bathrooms_count <- table(air_clean$bathrooms)
library(ggplot2)

p = ggplot(air_clean, aes(y = air_clean$price, x = air_clean$accommodates)) + geom_point() + facet_wrap(~air_clean$bedrooms)
print(p + labs(title = "Price compared to accommodation size split by bedrooms (Model 1)", y = "Price", x = "Accommodates"))

p = ggplot(air_clean, aes(y = air_clean$price, x = air_clean$accommodates)) + geom_point() + facet_wrap(~air_clean$bathrooms)
print(p + labs(title = "Price compared to accommodation size split by bathrooms (Model 1)", y = "Price", x = "Accommodates"))

p = ggplot(air_clean, aes(y = air_clean$price, x = air_clean$bathrooms)) + geom_point()
print(p + labs(title = "Price compared to bathrooms (Model 1)", y = "Price", x = "Bathrooms"))

p = ggplot(air_clean, aes(y = air_clean$price, x = air_clean$number_of_reviews)) + geom_point()
print(p + labs(title = "Price compared to the number of reviews (Model 1)", y = "Price", x = "Number of reviews"))

# Calculate correlation matrix
correlation_matrix <- model.matrix(~0 + ., air_clean) %>%
  cor(use = "pairwise.complete.obs")
## Warning in cor(., use = "pairwise.complete.obs"): the standard deviation is
## zero
# Identify correlations with the "y" variable (price)
review_correlations <- correlation_matrix["price",]

# Sort correlations in descending order to identify predictors
sorted_correlations <- sort(review_correlations, decreasing = TRUE)

# Convert sorted correlations to a data frame
correlation_df <- data.frame(Predictor_Variable = names(sorted_correlations), Correlation_with_y = sorted_correlations)

options(scipen = 999)
# Print the sorted correlations table
print(correlation_df)
##                                                                            Predictor_Variable
## price                                                                                   price
## bathrooms                                                                           bathrooms
## bedrooms                                                                             bedrooms
## accommodates                                                                     accommodates
## review_scores_rating                                                     review_scores_rating
## neighbourhood_cleansedDowntown                                 neighbourhood_cleansedDowntown
## neighbourhood_cleansedWest Point Grey                   neighbourhood_cleansedWest Point Grey
## availability_30                                                               availability_30
## availability_60                                                               availability_60
## availability_365                                                             availability_365
## neighbourhood_cleansedKitsilano                               neighbourhood_cleansedKitsilano
## availability_90                                                               availability_90
## neighbourhood_cleansedMount Pleasant                     neighbourhood_cleansedMount Pleasant
## neighbourhood_cleansedOakridge                                 neighbourhood_cleansedOakridge
## maximum_nights                                                                 maximum_nights
## host_acceptance_rate                                                     host_acceptance_rate
## host_is_superhostt                                                         host_is_superhostt
## minimum_nights                                                                 minimum_nights
## host_listings_count                                                       host_listings_count
## neighbourhood_cleansedWest End                                 neighbourhood_cleansedWest End
## neighbourhood_cleansedKerrisdale                             neighbourhood_cleansedKerrisdale
## host_response_timewithin an hour                             host_response_timewithin an hour
## host_response_timewithin a day                                 host_response_timewithin a day
## instant_bookablet                                                           instant_bookablet
## host_response_timewithin a few hours                     host_response_timewithin a few hours
## has_availabilityt                                                           has_availabilityt
## neighbourhood_cleansedShaughnessy                           neighbourhood_cleansedShaughnessy
## neighbourhood_cleansedStrathcona                             neighbourhood_cleansedStrathcona
## neighbourhood_cleansedDowntown Eastside               neighbourhood_cleansedDowntown Eastside
## neighbourhood_cleansedDunbar Southlands               neighbourhood_cleansedDunbar Southlands
## neighbourhood_cleansedFairview                                 neighbourhood_cleansedFairview
## neighbourhood_cleansedRiley Park                             neighbourhood_cleansedRiley Park
## host_is_superhostf                                                         host_is_superhostf
## host_response_rate                                                         host_response_rate
## neighbourhood_cleansedGrandview-Woodland             neighbourhood_cleansedGrandview-Woodland
## neighbourhood_cleansedSouth Cambie                         neighbourhood_cleansedSouth Cambie
## neighbourhood_cleansedSunset                                     neighbourhood_cleansedSunset
## neighbourhood_cleansedKillarney                               neighbourhood_cleansedKillarney
## neighbourhood_cleansedKensington-Cedar Cottage neighbourhood_cleansedKensington-Cedar Cottage
## neighbourhood_cleansedVictoria-Fraserview           neighbourhood_cleansedVictoria-Fraserview
## reviews_per_month                                                           reviews_per_month
## neighbourhood_cleansedMarpole                                   neighbourhood_cleansedMarpole
## host_identity_verifiedt                                               host_identity_verifiedt
## neighbourhood_cleansedHastings-Sunrise                 neighbourhood_cleansedHastings-Sunrise
## neighbourhood_cleansedRenfrew-Collingwood           neighbourhood_cleansedRenfrew-Collingwood
## number_of_reviews                                                           number_of_reviews
##                                                Correlation_with_y
## price                                                 1.000000000
## bathrooms                                             0.395649452
## bedrooms                                              0.395114285
## accommodates                                          0.380550792
## review_scores_rating                                  0.071042539
## neighbourhood_cleansedDowntown                        0.060304372
## neighbourhood_cleansedWest Point Grey                 0.058879237
## availability_30                                       0.055708574
## availability_60                                       0.048433605
## availability_365                                      0.048410249
## neighbourhood_cleansedKitsilano                       0.045056580
## availability_90                                       0.038783855
## neighbourhood_cleansedMount Pleasant                  0.026480149
## neighbourhood_cleansedOakridge                        0.024402039
## maximum_nights                                        0.021202133
## host_acceptance_rate                                  0.019713798
## host_is_superhostt                                    0.018904712
## minimum_nights                                        0.013038382
## host_listings_count                                   0.011821623
## neighbourhood_cleansedWest End                        0.004889116
## neighbourhood_cleansedKerrisdale                      0.004336057
## host_response_timewithin an hour                     -0.001229694
## host_response_timewithin a day                       -0.001977631
## instant_bookablet                                    -0.002355934
## host_response_timewithin a few hours                 -0.002934061
## has_availabilityt                                    -0.003912086
## neighbourhood_cleansedShaughnessy                    -0.006306109
## neighbourhood_cleansedStrathcona                     -0.006593262
## neighbourhood_cleansedDowntown Eastside              -0.006655020
## neighbourhood_cleansedDunbar Southlands              -0.008912073
## neighbourhood_cleansedFairview                       -0.013810626
## neighbourhood_cleansedRiley Park                     -0.014944620
## host_is_superhostf                                   -0.018904712
## host_response_rate                                   -0.020247048
## neighbourhood_cleansedGrandview-Woodland             -0.020587907
## neighbourhood_cleansedSouth Cambie                   -0.021280211
## neighbourhood_cleansedSunset                         -0.021825216
## neighbourhood_cleansedKillarney                      -0.024146890
## neighbourhood_cleansedKensington-Cedar Cottage       -0.030649981
## neighbourhood_cleansedVictoria-Fraserview            -0.030908505
## reviews_per_month                                    -0.036048000
## neighbourhood_cleansedMarpole                        -0.042170158
## host_identity_verifiedt                              -0.052983328
## neighbourhood_cleansedHastings-Sunrise               -0.053412240
## neighbourhood_cleansedRenfrew-Collingwood            -0.057621921
## number_of_reviews                                    -0.058361420
#install.packages("plotly")
require(plotly)
## Loading required package: plotly
## Warning: package 'plotly' was built under R version 4.3.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
m <- list(
  l =  10,
  r = 10,
  b = 10,
  t = 10,
  pad = 1
)
heatmap <- plot_ly(x=colnames(correlation_matrix), y=rownames(correlation_matrix), z = correlation_matrix, type="heatmap",
    colors=colorRamp(c("darkblue","white","darkred"))) %>%
    layout(margin = m)

#save graph as an html
#htmlwidgets::saveWidget(as_widget(heatmap), "heatmap.html")

heatmap

This is the kitchen sink model of OLS. This includes all of variables without any data transformations.

# Load the required library for linear regression
library(stats)


# Define the linear regression model using selected variables
model <- lm(price ~ host_is_superhost + host_response_time + host_response_rate + host_acceptance_rate +
               accommodates + bathrooms + bedrooms + neighbourhood_cleansed + host_listings_count + 
               minimum_nights + maximum_nights + instant_bookable + host_identity_verified + 
               availability_30 + availability_60 + availability_90 + review_scores_rating + reviews_per_month + has_availability, 
            data = air_clean)

# Print the summary of the model to check coefficients and other statistics
summary(model)
## 
## Call:
## lm(formula = price ~ host_is_superhost + host_response_time + 
##     host_response_rate + host_acceptance_rate + accommodates + 
##     bathrooms + bedrooms + neighbourhood_cleansed + host_listings_count + 
##     minimum_nights + maximum_nights + instant_bookable + host_identity_verified + 
##     availability_30 + availability_60 + availability_90 + review_scores_rating + 
##     reviews_per_month + has_availability, data = air_clean)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -594.8  -53.8  -11.9   29.0 8402.0 
## 
## Coefficients:
##                                                  Estimate Std. Error t value
## (Intercept)                                    -68.614897  80.456209  -0.853
## host_is_superhostt                              14.380850   6.725793   2.138
## host_response_timewithin a day                 -41.441943  52.046970  -0.796
## host_response_timewithin a few hours           -20.990902  55.245538  -0.380
## host_response_timewithin an hour               -31.802305  55.852283  -0.569
## host_response_rate                             -19.117751  57.747157  -0.331
## host_acceptance_rate                            28.529234  21.917586   1.302
## accommodates                                    14.191734   2.955346   4.802
## bathrooms                                       62.416952   7.547781   8.270
## bedrooms                                        36.662354   7.413905   4.945
## neighbourhood_cleansedDowntown                  20.579325  26.927630   0.764
## neighbourhood_cleansedDowntown Eastside          3.340701  29.384469   0.114
## neighbourhood_cleansedDunbar Southlands        -32.701465  31.968067  -1.023
## neighbourhood_cleansedFairview                  -4.612242  33.123843  -0.139
## neighbourhood_cleansedGrandview-Woodland       -29.375280  30.776075  -0.954
## neighbourhood_cleansedHastings-Sunrise         -57.948567  30.495717  -1.900
## neighbourhood_cleansedKensington-Cedar Cottage -37.073429  29.047622  -1.276
## neighbourhood_cleansedKerrisdale               -60.591455  37.450737  -1.618
## neighbourhood_cleansedKillarney                -79.350043  37.557899  -2.113
## neighbourhood_cleansedKitsilano                 24.905495  28.393454   0.877
## neighbourhood_cleansedMarpole                  -64.347936  32.738124  -1.966
## neighbourhood_cleansedMount Pleasant            31.366633  29.036472   1.080
## neighbourhood_cleansedOakridge                  -4.108600  35.524962  -0.116
## neighbourhood_cleansedRenfrew-Collingwood      -51.344649  30.056500  -1.708
## neighbourhood_cleansedRiley Park               -21.267632  29.282984  -0.726
## neighbourhood_cleansedShaughnessy              -14.297555  35.721561  -0.400
## neighbourhood_cleansedSouth Cambie             -24.227784  37.262393  -0.650
## neighbourhood_cleansedStrathcona                -6.936857  46.625311  -0.149
## neighbourhood_cleansedSunset                   -55.621589  33.615909  -1.655
## neighbourhood_cleansedVictoria-Fraserview      -93.116159  34.748721  -2.680
## neighbourhood_cleansedWest End                  18.671541  28.765937   0.649
## neighbourhood_cleansedWest Point Grey           74.206770  36.685625   2.023
## host_listings_count                              0.035374   0.100269   0.353
## minimum_nights                                   0.227415   0.153342   1.483
## maximum_nights                                   0.001765   0.006622   0.267
## instant_bookablet                               -8.478193   7.538493  -1.125
## host_identity_verifiedt                        -64.831607  17.255350  -3.757
## availability_30                                  1.700694   0.768408   2.213
## availability_60                                 -0.345842   0.757097  -0.457
## availability_90                                  0.076607   0.379661   0.202
## review_scores_rating                            33.004020   7.778605   4.243
## reviews_per_month                               -5.164100   2.054387  -2.514
## has_availabilityt                              -15.623611  54.670534  -0.286
##                                                            Pr(>|t|)    
## (Intercept)                                                0.393812    
## host_is_superhostt                                         0.032567 *  
## host_response_timewithin a day                             0.425942    
## host_response_timewithin a few hours                       0.703999    
## host_response_timewithin an hour                           0.569118    
## host_response_rate                                         0.740618    
## host_acceptance_rate                                       0.193112    
## accommodates                                            0.000001631 ***
## bathrooms                                      < 0.0000000000000002 ***
## bedrooms                                                0.000000794 ***
## neighbourhood_cleansedDowntown                             0.444769    
## neighbourhood_cleansedDowntown Eastside                    0.909490    
## neighbourhood_cleansedDunbar Southlands                    0.306401    
## neighbourhood_cleansedFairview                             0.889266    
## neighbourhood_cleansedGrandview-Woodland                   0.339900    
## neighbourhood_cleansedHastings-Sunrise                     0.057480 .  
## neighbourhood_cleansedKensington-Cedar Cottage             0.201929    
## neighbourhood_cleansedKerrisdale                           0.105768    
## neighbourhood_cleansedKillarney                            0.034689 *  
## neighbourhood_cleansedKitsilano                            0.380458    
## neighbourhood_cleansedMarpole                              0.049426 *  
## neighbourhood_cleansedMount Pleasant                       0.280100    
## neighbourhood_cleansedOakridge                             0.907933    
## neighbourhood_cleansedRenfrew-Collingwood                  0.087668 .  
## neighbourhood_cleansedRiley Park                           0.467712    
## neighbourhood_cleansedShaughnessy                          0.688995    
## neighbourhood_cleansedSouth Cambie                         0.515606    
## neighbourhood_cleansedStrathcona                           0.881736    
## neighbourhood_cleansedSunset                               0.098085 .  
## neighbourhood_cleansedVictoria-Fraserview                  0.007401 ** 
## neighbourhood_cleansedWest End                             0.516323    
## neighbourhood_cleansedWest Point Grey                      0.043167 *  
## host_listings_count                                        0.724261    
## minimum_nights                                             0.138142    
## maximum_nights                                             0.789848    
## instant_bookablet                                          0.260807    
## host_identity_verifiedt                                    0.000174 ***
## availability_30                                            0.026939 *  
## availability_60                                            0.647841    
## availability_90                                            0.840101    
## review_scores_rating                                    0.000022592 ***
## reviews_per_month                                          0.011989 *  
## has_availabilityt                                          0.775064    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 188 on 3775 degrees of freedom
##   (827 observations deleted due to missingness)
## Multiple R-squared:  0.2236, Adjusted R-squared:  0.2149 
## F-statistic: 25.88 on 42 and 3775 DF,  p-value: < 0.00000000000000022
# Remove rows with missing values
air_clean <- na.omit(air_clean)


# Perform stepwise selection
step_model <- step(model)
## Start:  AIC=40029.87
## price ~ host_is_superhost + host_response_time + host_response_rate + 
##     host_acceptance_rate + accommodates + bathrooms + bedrooms + 
##     neighbourhood_cleansed + host_listings_count + minimum_nights + 
##     maximum_nights + instant_bookable + host_identity_verified + 
##     availability_30 + availability_60 + availability_90 + review_scores_rating + 
##     reviews_per_month + has_availability
## 
##                          Df Sum of Sq       RSS   AIC
## - host_response_time      3    103632 133584511 40027
## - availability_90         1      1440 133482318 40028
## - maximum_nights          1      2512 133483390 40028
## - has_availability        1      2888 133483766 40028
## - host_response_rate      1      3875 133484754 40028
## - host_listings_count     1      4401 133485280 40028
## - availability_60         1      7378 133488257 40028
## - instant_bookable        1     44724 133525602 40029
## - host_acceptance_rate    1     59910 133540788 40030
## <none>                                133480879 40030
## - minimum_nights          1     77771 133558650 40030
## - host_is_superhost       1    161653 133642532 40032
## - availability_30         1    173209 133654088 40033
## - reviews_per_month       1    223423 133704301 40034
## - host_identity_verified  1    499147 133980025 40042
## - review_scores_rating    1    636550 134117428 40046
## - accommodates            1    815373 134296252 40051
## - bedrooms                1    864667 134345545 40053
## - bathrooms               1   2418068 135898947 40096
## - neighbourhood_cleansed 22   4277486 137758365 40106
## 
## Step:  AIC=40026.83
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + maximum_nights + instant_bookable + 
##     host_identity_verified + availability_30 + availability_60 + 
##     availability_90 + review_scores_rating + reviews_per_month + 
##     has_availability
## 
##                          Df Sum of Sq       RSS   AIC
## - availability_90         1      1249 133585759 40025
## - maximum_nights          1      1274 133585785 40025
## - has_availability        1      2765 133587275 40025
## - availability_60         1      7121 133591632 40025
## - host_listings_count     1      7176 133591686 40025
## - host_response_rate      1     48400 133632910 40026
## - instant_bookable        1     50791 133635302 40026
## - host_acceptance_rate    1     57199 133641710 40026
## <none>                                133584511 40027
## - minimum_nights          1     80187 133664697 40027
## - host_is_superhost       1    165439 133749949 40030
## - availability_30         1    168454 133752964 40030
## - reviews_per_month       1    246044 133830555 40032
## - host_identity_verified  1    491348 134075858 40039
## - review_scores_rating    1    664729 134249239 40044
## - accommodates            1    801141 134385651 40048
## - bedrooms                1    870305 134454816 40050
## - bathrooms               1   2408446 135992957 40093
## - neighbourhood_cleansed 22   4255500 137840011 40103
## 
## Step:  AIC=40024.86
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + maximum_nights + instant_bookable + 
##     host_identity_verified + availability_30 + availability_60 + 
##     review_scores_rating + reviews_per_month + has_availability
## 
##                          Df Sum of Sq       RSS   AIC
## - maximum_nights          1      1350 133587109 40023
## - has_availability        1      2821 133588581 40023
## - host_listings_count     1      7547 133593307 40023
## - availability_60         1     12835 133598594 40023
## - host_response_rate      1     48995 133634755 40024
## - instant_bookable        1     50699 133636459 40024
## - host_acceptance_rate    1     58179 133643939 40025
## <none>                                133585759 40025
## - minimum_nights          1     81354 133667114 40025
## - host_is_superhost       1    164325 133750084 40028
## - availability_30         1    187114 133772874 40028
## - reviews_per_month       1    246986 133832745 40030
## - host_identity_verified  1    491180 134076939 40037
## - review_scores_rating    1    664541 134250300 40042
## - accommodates            1    802810 134388569 40046
## - bedrooms                1    869188 134454947 40048
## - bathrooms               1   2407454 135993214 40091
## - neighbourhood_cleansed 22   4254649 137840408 40101
## 
## Step:  AIC=40022.9
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + instant_bookable + 
##     host_identity_verified + availability_30 + availability_60 + 
##     review_scores_rating + reviews_per_month + has_availability
## 
##                          Df Sum of Sq       RSS   AIC
## - has_availability        1      2752 133589862 40021
## - host_listings_count     1      7540 133594650 40021
## - availability_60         1     12710 133599820 40021
## - host_response_rate      1     48941 133636051 40022
## - instant_bookable        1     51188 133638297 40022
## - host_acceptance_rate    1     58450 133645559 40023
## <none>                                133587109 40023
## - minimum_nights          1     84322 133671431 40023
## - host_is_superhost       1    166379 133753488 40026
## - availability_30         1    186339 133773448 40026
## - reviews_per_month       1    249971 133837081 40028
## - host_identity_verified  1    490176 134077285 40035
## - review_scores_rating    1    663193 134250302 40040
## - accommodates            1    803547 134390657 40044
## - bedrooms                1    871136 134458245 40046
## - bathrooms               1   2406207 135993316 40089
## - neighbourhood_cleansed 22   4269346 137856455 40099
## 
## Step:  AIC=40020.98
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + instant_bookable + 
##     host_identity_verified + availability_30 + availability_60 + 
##     review_scores_rating + reviews_per_month
## 
##                          Df Sum of Sq       RSS   AIC
## - host_listings_count     1      7540 133597402 40019
## - availability_60         1     12735 133602597 40019
## - host_response_rate      1     48770 133638632 40020
## - instant_bookable        1     51999 133641861 40020
## - host_acceptance_rate    1     58768 133648630 40021
## <none>                                133589862 40021
## - minimum_nights          1     84056 133673917 40021
## - host_is_superhost       1    168174 133758036 40024
## - availability_30         1    186688 133776550 40024
## - reviews_per_month       1    251466 133841328 40026
## - host_identity_verified  1    489679 134079541 40033
## - review_scores_rating    1    661848 134251709 40038
## - accommodates            1    806231 134396092 40042
## - bedrooms                1    868584 134458446 40044
## - bathrooms               1   2412954 136002816 40087
## - neighbourhood_cleansed 22   4268184 137858045 40097
## 
## Step:  AIC=40019.2
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     minimum_nights + instant_bookable + host_identity_verified + 
##     availability_30 + availability_60 + review_scores_rating + 
##     reviews_per_month
## 
##                          Df Sum of Sq       RSS   AIC
## - availability_60         1     11734 133609135 40018
## - host_response_rate      1     47650 133645052 40019
## - instant_bookable        1     48896 133646297 40019
## - host_acceptance_rate    1     59106 133656508 40019
## <none>                                133597402 40019
## - minimum_nights          1     85555 133682957 40020
## - host_is_superhost       1    162204 133759606 40022
## - availability_30         1    183626 133781028 40022
## - reviews_per_month       1    259107 133856509 40025
## - host_identity_verified  1    485061 134082463 40031
## - review_scores_rating    1    656489 134253891 40036
## - accommodates            1    802342 134399744 40040
## - bedrooms                1    874892 134472293 40042
## - bathrooms               1   2411066 136008468 40085
## - neighbourhood_cleansed 22   4326652 137924054 40097
## 
## Step:  AIC=40017.53
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     minimum_nights + instant_bookable + host_identity_verified + 
##     availability_30 + review_scores_rating + reviews_per_month
## 
##                          Df Sum of Sq       RSS   AIC
## - host_response_rate      1     46305 133655440 40017
## - instant_bookable        1     50771 133659906 40017
## - host_acceptance_rate    1     54992 133664127 40017
## <none>                                133609135 40018
## - minimum_nights          1     84750 133693885 40018
## - host_is_superhost       1    165106 133774242 40020
## - reviews_per_month       1    265876 133875012 40023
## - host_identity_verified  1    489228 134098363 40029
## - availability_30         1    525995 134135130 40031
## - review_scores_rating    1    658725 134267861 40034
## - accommodates            1    799044 134408179 40038
## - bedrooms                1    877465 134486601 40041
## - bathrooms               1   2408466 136017601 40084
## - neighbourhood_cleansed 22   4322438 137931573 40095
## 
## Step:  AIC=40016.86
## price ~ host_is_superhost + host_acceptance_rate + accommodates + 
##     bathrooms + bedrooms + neighbourhood_cleansed + minimum_nights + 
##     instant_bookable + host_identity_verified + availability_30 + 
##     review_scores_rating + reviews_per_month
## 
##                          Df Sum of Sq       RSS   AIC
## - host_acceptance_rate    1     30657 133686097 40016
## - instant_bookable        1     50515 133705955 40016
## <none>                                133655440 40017
## - minimum_nights          1     83346 133738786 40017
## - host_is_superhost       1    158757 133814197 40019
## - reviews_per_month       1    270254 133925694 40023
## - host_identity_verified  1    495639 134151078 40029
## - availability_30         1    545372 134200812 40030
## - review_scores_rating    1    651864 134307304 40033
## - accommodates            1    798875 134454315 40038
## - bedrooms                1    876113 134531552 40040
## - bathrooms               1   2417322 136072762 40083
## - neighbourhood_cleansed 22   4355970 138011410 40095
## 
## Step:  AIC=40015.73
## price ~ host_is_superhost + accommodates + bathrooms + bedrooms + 
##     neighbourhood_cleansed + minimum_nights + instant_bookable + 
##     host_identity_verified + availability_30 + review_scores_rating + 
##     reviews_per_month
## 
##                          Df Sum of Sq       RSS   AIC
## - instant_bookable        1     37896 133723993 40015
## <none>                                133686097 40016
## - minimum_nights          1     73129 133759226 40016
## - host_is_superhost       1    192539 133878636 40019
## - reviews_per_month       1    245126 133931223 40021
## - host_identity_verified  1    500407 134186504 40028
## - availability_30         1    548591 134234688 40029
## - review_scores_rating    1    651690 134337787 40032
## - accommodates            1    828624 134514721 40037
## - bedrooms                1    870637 134556734 40039
## - bathrooms               1   2399211 136085308 40082
## - neighbourhood_cleansed 22   4334411 138020508 40094
## 
## Step:  AIC=40014.81
## price ~ host_is_superhost + accommodates + bathrooms + bedrooms + 
##     neighbourhood_cleansed + minimum_nights + host_identity_verified + 
##     availability_30 + review_scores_rating + reviews_per_month
## 
##                          Df Sum of Sq       RSS   AIC
## <none>                                133723993 40015
## - minimum_nights          1     77720 133801713 40015
## - host_is_superhost       1    208509 133932502 40019
## - reviews_per_month       1    269104 133993097 40020
## - host_identity_verified  1    495765 134219758 40027
## - availability_30         1    532607 134256600 40028
## - review_scores_rating    1    681435 134405428 40032
## - accommodates            1    813727 134537720 40036
## - bedrooms                1    881805 134605798 40038
## - bathrooms               1   2397946 136121939 40081
## - neighbourhood_cleansed 22   4318983 138042976 40092
# Summary of the model with stepwise selection
summary(step_model)
## 
## Call:
## lm(formula = price ~ host_is_superhost + accommodates + bathrooms + 
##     bedrooms + neighbourhood_cleansed + minimum_nights + host_identity_verified + 
##     availability_30 + review_scores_rating + reviews_per_month, 
##     data = air_clean)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -591.5  -53.0  -12.8   29.5 8411.0 
## 
## Coefficients:
##                                                 Estimate Std. Error t value
## (Intercept)                                    -112.0849    49.0409  -2.286
## host_is_superhostt                               15.7748     6.4926   2.430
## accommodates                                     14.0895     2.9354   4.800
## bathrooms                                        62.0460     7.5302   8.240
## bedrooms                                         36.9237     7.3898   4.997
## neighbourhood_cleansedDowntown                   19.7935    26.8794   0.736
## neighbourhood_cleansedDowntown Eastside           4.1203    29.3261   0.140
## neighbourhood_cleansedDunbar Southlands         -33.0215    31.8826  -1.036
## neighbourhood_cleansedFairview                   -4.7947    33.0391  -0.145
## neighbourhood_cleansedGrandview-Woodland        -29.9364    30.6737  -0.976
## neighbourhood_cleansedHastings-Sunrise          -57.0590    30.4421  -1.874
## neighbourhood_cleansedKensington-Cedar Cottage  -38.7957    28.9638  -1.339
## neighbourhood_cleansedKerrisdale                -60.4145    37.4025  -1.615
## neighbourhood_cleansedKillarney                 -80.8849    37.4822  -2.158
## neighbourhood_cleansedKitsilano                  23.9052    28.3344   0.844
## neighbourhood_cleansedMarpole                   -65.3931    32.7020  -2.000
## neighbourhood_cleansedMount Pleasant             31.4575    29.0042   1.085
## neighbourhood_cleansedOakridge                   -5.1763    35.4228  -0.146
## neighbourhood_cleansedRenfrew-Collingwood       -52.4513    29.9687  -1.750
## neighbourhood_cleansedRiley Park                -20.5983    29.2187  -0.705
## neighbourhood_cleansedShaughnessy               -20.1101    35.3966  -0.568
## neighbourhood_cleansedSouth Cambie              -24.6347    37.1773  -0.663
## neighbourhood_cleansedStrathcona                 -8.0929    46.5123  -0.174
## neighbourhood_cleansedSunset                    -56.7506    33.5279  -1.693
## neighbourhood_cleansedVictoria-Fraserview       -92.5127    34.6655  -2.669
## neighbourhood_cleansedWest End                   18.6769    28.6604   0.652
## neighbourhood_cleansedWest Point Grey            70.9888    36.6270   1.938
## minimum_nights                                    0.2237     0.1508   1.483
## host_identity_verifiedt                         -64.4253    17.1962  -3.746
## availability_30                                   1.2442     0.3204   3.883
## review_scores_rating                             33.7967     7.6944   4.392
## reviews_per_month                                -5.4174     1.9627  -2.760
##                                                            Pr(>|t|)    
## (Intercept)                                                0.022336 *  
## host_is_superhostt                                         0.015159 *  
## accommodates                                   0.000001649396864246 ***
## bathrooms                                      0.000000000000000236 ***
## bedrooms                                       0.000000609972329033 ***
## neighbourhood_cleansedDowntown                             0.461544    
## neighbourhood_cleansedDowntown Eastside                    0.888273    
## neighbourhood_cleansedDunbar Southlands                    0.300399    
## neighbourhood_cleansedFairview                             0.884622    
## neighbourhood_cleansedGrandview-Woodland                   0.329146    
## neighbourhood_cleansedHastings-Sunrise                     0.060960 .  
## neighbourhood_cleansedKensington-Cedar Cottage             0.180503    
## neighbourhood_cleansedKerrisdale                           0.106339    
## neighbourhood_cleansedKillarney                            0.030994 *  
## neighbourhood_cleansedKitsilano                            0.398902    
## neighbourhood_cleansedMarpole                              0.045608 *  
## neighbourhood_cleansedMount Pleasant                       0.278176    
## neighbourhood_cleansedOakridge                             0.883828    
## neighbourhood_cleansedRenfrew-Collingwood                  0.080164 .  
## neighbourhood_cleansedRiley Park                           0.480872    
## neighbourhood_cleansedShaughnessy                          0.569976    
## neighbourhood_cleansedSouth Cambie                         0.507610    
## neighbourhood_cleansedStrathcona                           0.861878    
## neighbourhood_cleansedSunset                               0.090606 .  
## neighbourhood_cleansedVictoria-Fraserview                  0.007646 ** 
## neighbourhood_cleansedWest End                             0.514659    
## neighbourhood_cleansedWest Point Grey                      0.052678 .  
## minimum_nights                                             0.138057    
## host_identity_verifiedt                                    0.000182 ***
## availability_30                                            0.000105 ***
## review_scores_rating                           0.000011519513171064 ***
## reviews_per_month                                          0.005804 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 187.9 on 3786 degrees of freedom
## Multiple R-squared:  0.2222, Adjusted R-squared:  0.2158 
## F-statistic: 34.88 on 31 and 3786 DF,  p-value: < 0.00000000000000022
p = ggplot(air_clean, aes(y = air_clean$price, x = air_clean$accommodates)) + geom_point() + facet_wrap(~air_clean$bedrooms)
print(p + labs(title = "Price compared to accommodation size split by bedrooms (Model 2)", y = "Price", x = "Accommodates"))

p = ggplot(air_clean, aes(y = air_clean$price, x = air_clean$accommodates)) + geom_point() + facet_wrap(~air_clean$bathrooms) + geom_boxplot(aes(group = air_clean$accommodates))
print(p + labs(title = "Price compared to accommodation size split by bathrooms (Model 2)", y = "Price", x = "Accommodates")) 

p = ggplot(air_clean, aes(y = air_clean$price, x = air_clean$bathrooms)) + geom_boxplot(aes(group = air_clean$bathrooms))
print(p + labs(title = "Price compared to bathrooms (Model 2)", y = "Price", x = "Bathrooms"))

p = ggplot(air_clean, aes(y = air_clean$price, x = air_clean$number_of_reviews)) + geom_point()
print(p + labs(title = "Price compared to the number of reviews (Model 2)", y = "Price", x = "Number of reviews"))

In this chunk of code I got rid of non-signficant neighbourhoods.

# Create a vector of significant neighbourhoods
significant_neighborhoods <- c("Downtown", "Downtown Eastside","Kitsilano", "Hastings-Sunrise","Kerrisdale", "Killarney","Marpole", "Mount Pleasant", "Oakridge", "Renfrew-Collingwood","Sunset", "Victoria-Fraserview", "West End")

# Filter the dataset to include only significant neighbourhoods
air_clean_sig <- air_clean %>%
  filter(neighbourhood_cleansed %in% significant_neighborhoods)

# Fit a linear regression model with significant neighbourhoods
lm_model_significant_neighborhoods <- lm(price ~ host_is_superhost + host_response_time + host_response_rate + host_acceptance_rate +
               accommodates + bathrooms + bedrooms + neighbourhood_cleansed + host_listings_count + 
               minimum_nights + maximum_nights + instant_bookable + host_identity_verified + 
               availability_30 + availability_60 + availability_90 + review_scores_rating + reviews_per_month + has_availability, 
                                       data = air_clean_sig)



# Perform stepwise selection on the model
step_model2 <- step(lm_model_significant_neighborhoods)
## Start:  AIC=29951.86
## price ~ host_is_superhost + host_response_time + host_response_rate + 
##     host_acceptance_rate + accommodates + bathrooms + bedrooms + 
##     neighbourhood_cleansed + host_listings_count + minimum_nights + 
##     maximum_nights + instant_bookable + host_identity_verified + 
##     availability_30 + availability_60 + availability_90 + review_scores_rating + 
##     reviews_per_month + has_availability
## 
##                          Df Sum of Sq       RSS   AIC
## - host_response_time      3    101525 119806674 29948
## - availability_60         1      1541 119706690 29950
## - maximum_nights          1      1962 119707111 29950
## - availability_90         1      2296 119707445 29950
## - has_availability        1      7962 119713111 29950
## - host_response_rate      1      9414 119714563 29950
## - host_listings_count     1     21399 119726548 29950
## - reviews_per_month       1     35951 119741100 29951
## - host_acceptance_rate    1     75517 119780666 29952
## <none>                                119705149 29952
## - instant_bookable        1     91047 119796196 29952
## - availability_30         1    105595 119810744 29952
## - host_is_superhost       1    164460 119869609 29954
## - host_identity_verified  1    339175 120044324 29958
## - minimum_nights          1    374516 120079665 29959
## - bedrooms                1    452026 120157174 29960
## - review_scores_rating    1    546572 120251721 29963
## - accommodates            1    596566 120301715 29964
## - bathrooms               1   1888585 121593733 29994
## - neighbourhood_cleansed 12   3208111 122913260 30002
## 
## Step:  AIC=29948.23
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + maximum_nights + instant_bookable + 
##     host_identity_verified + availability_30 + availability_60 + 
##     availability_90 + review_scores_rating + reviews_per_month + 
##     has_availability
## 
##                          Df Sum of Sq       RSS   AIC
## - maximum_nights          1       873 119807546 29946
## - availability_60         1      1430 119808103 29946
## - availability_90         1      2060 119808734 29946
## - has_availability        1      7384 119814057 29946
## - host_listings_count     1     28981 119835655 29947
## - reviews_per_month       1     43048 119849721 29947
## - host_response_rate      1     55112 119861786 29948
## - host_acceptance_rate    1     72992 119879665 29948
## <none>                                119806674 29948
## - instant_bookable        1    101090 119907764 29949
## - availability_30         1    102165 119908839 29949
## - host_is_superhost       1    164373 119971047 29950
## - host_identity_verified  1    327028 120133702 29954
## - minimum_nights          1    380920 120187593 29955
## - bedrooms                1    454675 120261349 29957
## - review_scores_rating    1    570650 120377323 29960
## - accommodates            1    574102 120380776 29960
## - bathrooms               1   1893253 121699927 29990
## - neighbourhood_cleansed 12   3195984 123002658 29998
## 
## Step:  AIC=29946.25
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + instant_bookable + 
##     host_identity_verified + availability_30 + availability_60 + 
##     availability_90 + review_scores_rating + reviews_per_month + 
##     has_availability
## 
##                          Df Sum of Sq       RSS   AIC
## - availability_60         1      1528 119809074 29944
## - availability_90         1      2192 119809738 29944
## - has_availability        1      7354 119814900 29944
## - host_listings_count     1     29050 119836596 29945
## - reviews_per_month       1     44116 119851662 29945
## - host_response_rate      1     55100 119862647 29946
## - host_acceptance_rate    1     73009 119880555 29946
## <none>                                119807546 29946
## - instant_bookable        1    102209 119909755 29947
## - availability_30         1    102233 119909779 29947
## - host_is_superhost       1    166656 119974202 29948
## - host_identity_verified  1    326521 120134067 29952
## - minimum_nights          1    386681 120194228 29953
## - bedrooms                1    457110 120264657 29955
## - review_scores_rating    1    569840 120377387 29958
## - accommodates            1    573793 120381339 29958
## - bathrooms               1   1892509 121700056 29988
## - neighbourhood_cleansed 12   3201343 123008890 29996
## 
## Step:  AIC=29944.29
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + instant_bookable + 
##     host_identity_verified + availability_30 + availability_90 + 
##     review_scores_rating + reviews_per_month + has_availability
## 
##                          Df Sum of Sq       RSS   AIC
## - availability_90         1       684 119809758 29942
## - has_availability        1      7609 119816683 29943
## - host_listings_count     1     29490 119838564 29943
## - reviews_per_month       1     44852 119853926 29943
## - host_response_rate      1     55376 119864450 29944
## - host_acceptance_rate    1     72955 119882029 29944
## <none>                                119809074 29944
## - instant_bookable        1    102431 119911505 29945
## - host_is_superhost       1    165467 119974540 29946
## - availability_30         1    184374 119993448 29947
## - host_identity_verified  1    326405 120135478 29950
## - minimum_nights          1    388938 120198012 29951
## - bedrooms                1    456518 120265592 29953
## - review_scores_rating    1    570802 120379876 29956
## - accommodates            1    573783 120382857 29956
## - bathrooms               1   1891705 121700779 29986
## - neighbourhood_cleansed 12   3200402 123009476 29994
## 
## Step:  AIC=29942.31
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + instant_bookable + 
##     host_identity_verified + availability_30 + review_scores_rating + 
##     reviews_per_month + has_availability
## 
##                          Df Sum of Sq       RSS   AIC
## - has_availability        1      7639 119817397 29941
## - host_listings_count     1     30268 119840026 29941
## - reviews_per_month       1     44519 119854277 29941
## - host_response_rate      1     56048 119865806 29942
## - host_acceptance_rate    1     74802 119884560 29942
## <none>                                119809758 29942
## - instant_bookable        1    101962 119911720 29943
## - host_is_superhost       1    164806 119974564 29944
## - host_identity_verified  1    326146 120135904 29948
## - minimum_nights          1    389738 120199496 29949
## - bedrooms                1    456234 120265992 29951
## - availability_30         1    502692 120312450 29952
## - review_scores_rating    1    570724 120380482 29954
## - accommodates            1    575506 120385263 29954
## - bathrooms               1   1891289 121701047 29984
## - neighbourhood_cleansed 12   3200310 123010068 29992
## 
## Step:  AIC=29940.48
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + instant_bookable + 
##     host_identity_verified + availability_30 + review_scores_rating + 
##     reviews_per_month
## 
##                          Df Sum of Sq       RSS   AIC
## - host_listings_count     1     30175 119847571 29939
## - reviews_per_month       1     45622 119863018 29940
## - host_response_rate      1     55535 119872932 29940
## - host_acceptance_rate    1     75002 119892398 29940
## <none>                                119817397 29941
## - instant_bookable        1    103716 119921112 29941
## - host_is_superhost       1    167346 119984742 29942
## - host_identity_verified  1    325405 120142801 29946
## - minimum_nights          1    388735 120206131 29948
## - bedrooms                1    453820 120271217 29949
## - availability_30         1    505903 120323300 29950
## - review_scores_rating    1    567431 120384828 29952
## - accommodates            1    579283 120396680 29952
## - bathrooms               1   1890728 121708124 29982
## - neighbourhood_cleansed 12   3201610 123019007 29990
## 
## Step:  AIC=29939.19
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     minimum_nights + instant_bookable + host_identity_verified + 
##     availability_30 + review_scores_rating + reviews_per_month
## 
##                          Df Sum of Sq       RSS   AIC
## - reviews_per_month       1     51904 119899476 29938
## - host_response_rate      1     53084 119900655 29938
## - host_acceptance_rate    1     75862 119923434 29939
## <none>                                119847571 29939
## - instant_bookable        1     95542 119943113 29939
## - host_is_superhost       1    152961 120000532 29941
## - host_identity_verified  1    316752 120164323 29945
## - minimum_nights          1    393691 120241262 29946
## - bedrooms                1    463063 120310634 29948
## - availability_30         1    504716 120352287 29949
## - review_scores_rating    1    556903 120404474 29950
## - accommodates            1    572343 120419914 29951
## - bathrooms               1   1882700 121730271 29981
## - neighbourhood_cleansed 12   3261100 123108671 29990
## 
## Step:  AIC=29938.4
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     minimum_nights + instant_bookable + host_identity_verified + 
##     availability_30 + review_scores_rating
## 
##                          Df Sum of Sq       RSS   AIC
## - host_response_rate      1     54963 119954439 29938
## - host_acceptance_rate    1     58087 119957563 29938
## <none>                                119899476 29938
## - instant_bookable        1    105394 120004870 29939
## - host_is_superhost       1    134378 120033853 29940
## - host_identity_verified  1    317719 120217195 29944
## - bedrooms                1    473114 120372589 29947
## - availability_30         1    514875 120414350 29948
## - accommodates            1    537790 120437266 29949
## - review_scores_rating    1    541879 120441355 29949
## - minimum_nights          1    551125 120450601 29949
## - bathrooms               1   1975566 121875042 29982
## - neighbourhood_cleansed 12   3231552 123131028 29989
## 
## Step:  AIC=29937.69
## price ~ host_is_superhost + host_acceptance_rate + accommodates + 
##     bathrooms + bedrooms + neighbourhood_cleansed + minimum_nights + 
##     instant_bookable + host_identity_verified + availability_30 + 
##     review_scores_rating
## 
##                          Df Sum of Sq       RSS   AIC
## - host_acceptance_rate    1     28616 119983055 29936
## <none>                                119954439 29938
## - instant_bookable        1    105085 120059524 29938
## - host_is_superhost       1    125459 120079898 29939
## - host_identity_verified  1    322331 120276770 29943
## - bedrooms                1    466805 120421244 29947
## - review_scores_rating    1    533675 120488114 29948
## - accommodates            1    538126 120492565 29948
## - availability_30         1    542662 120497101 29948
## - minimum_nights          1    543823 120498262 29948
## - bathrooms               1   1983143 121937582 29982
## - neighbourhood_cleansed 12   3246192 123200631 29989
## 
## Step:  AIC=29936.36
## price ~ host_is_superhost + accommodates + bathrooms + bedrooms + 
##     neighbourhood_cleansed + minimum_nights + instant_bookable + 
##     host_identity_verified + availability_30 + review_scores_rating
## 
##                          Df Sum of Sq       RSS   AIC
## <none>                                119983055 29936
## - instant_bookable        1     86666 120069721 29936
## - host_is_superhost       1    158117 120141172 29938
## - host_identity_verified  1    326593 120309648 29942
## - bedrooms                1    459689 120442744 29945
## - minimum_nights          1    515214 120498269 29946
## - availability_30         1    535040 120518095 29947
## - review_scores_rating    1    535662 120518717 29947
## - accommodates            1    572401 120555456 29948
## - bathrooms               1   1959822 121942877 29980
## - neighbourhood_cleansed 12   3221564 123204619 29987
# Summary of the model with stepwise selection
summary(step_model2)
## 
## Call:
## lm(formula = price ~ host_is_superhost + accommodates + bathrooms + 
##     bedrooms + neighbourhood_cleansed + minimum_nights + instant_bookable + 
##     host_identity_verified + availability_30 + review_scores_rating, 
##     data = air_clean_sig)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -589.8  -55.4  -13.4   29.0 8408.0 
## 
## Coefficients:
##                                            Estimate Std. Error t value
## (Intercept)                               -102.0570    51.7084  -1.974
## host_is_superhostt                          15.8017     8.2542   1.914
## accommodates                                13.4856     3.7024   3.642
## bathrooms                                   67.6098    10.0314   6.740
## bedrooms                                    30.9713     9.4883   3.264
## neighbourhood_cleansedDowntown Eastside    -17.4988    15.8407  -1.105
## neighbourhood_cleansedHastings-Sunrise     -73.2112    18.5153  -3.954
## neighbourhood_cleansedKerrisdale           -74.8435    30.5068  -2.453
## neighbourhood_cleansedKillarney            -92.2314    30.6138  -3.013
## neighbourhood_cleansedKitsilano              9.9534    13.8034   0.721
## neighbourhood_cleansedMarpole              -81.2461    22.6167  -3.592
## neighbourhood_cleansedMount Pleasant        15.9457    15.1700   1.051
## neighbourhood_cleansedOakridge             -14.6127    27.6033  -0.529
## neighbourhood_cleansedRenfrew-Collingwood  -66.9938    17.5390  -3.820
## neighbourhood_cleansedSunset               -70.4233    24.1900  -2.911
## neighbourhood_cleansedVictoria-Fraserview -107.5467    26.3020  -4.089
## neighbourhood_cleansedWest End              -0.2730    14.1337  -0.019
## minimum_nights                               0.5869     0.1698   3.456
## instant_bookablet                          -13.1850     9.3028  -1.417
## host_identity_verifiedt                    -64.4407    23.4216  -2.751
## availability_30                              1.4743     0.4186   3.522
## review_scores_rating                        33.1591     9.4106   3.524
##                                                  Pr(>|t|)    
## (Intercept)                                      0.048514 *  
## host_is_superhostt                               0.055673 .  
## accommodates                                     0.000275 ***
## bathrooms                                 0.0000000000192 ***
## bedrooms                                         0.001111 ** 
## neighbourhood_cleansedDowntown Eastside          0.269396    
## neighbourhood_cleansedHastings-Sunrise    0.0000787425421 ***
## neighbourhood_cleansedKerrisdale                 0.014215 *  
## neighbourhood_cleansedKillarney                  0.002612 ** 
## neighbourhood_cleansedKitsilano                  0.470921    
## neighbourhood_cleansedMarpole                    0.000333 ***
## neighbourhood_cleansedMount Pleasant             0.293288    
## neighbourhood_cleansedOakridge                   0.596583    
## neighbourhood_cleansedRenfrew-Collingwood        0.000137 ***
## neighbourhood_cleansedSunset                     0.003628 ** 
## neighbourhood_cleansedVictoria-Fraserview 0.0000445670787 ***
## neighbourhood_cleansedWest End                   0.984590    
## minimum_nights                                   0.000557 ***
## instant_bookablet                                0.156503    
## host_identity_verifiedt                          0.005974 ** 
## availability_30                                  0.000436 ***
## review_scores_rating                             0.000433 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 207.7 on 2781 degrees of freedom
## Multiple R-squared:  0.1718, Adjusted R-squared:  0.1655 
## F-statistic: 27.47 on 21 and 2781 DF,  p-value: < 0.00000000000000022
p = ggplot(air_clean_sig, aes(y = air_clean_sig$price, x = air_clean_sig$accommodates)) + geom_point() + facet_wrap(~air_clean_sig$bedrooms)
print(p + labs(title = "Price compared to accommodation size split by bedrooms", y = "Price", x = "Accommodates"))

p = ggplot(air_clean_sig, aes(y = air_clean_sig$price, x = air_clean_sig$accommodates)) + geom_point() + facet_wrap(~air_clean_sig$bathrooms) + geom_boxplot(aes(group = air_clean_sig$accommodates))
print(p + labs(title = "Price compared to accommodation size split by bathrooms", y = "Price", x = "Accommodates")) 

p = ggplot(air_clean_sig, aes(y = air_clean_sig$price, x = air_clean_sig$bathrooms)) + geom_boxplot(aes(group = air_clean_sig$bathrooms))
print(p + labs(title = "Price compared to bathrooms", y = "Price", x = "Bathrooms"))

p = ggplot(air_clean_sig, aes(y = air_clean_sig$price, x = air_clean_sig$number_of_reviews)) + geom_point()
print(p + labs(title = "Price compared to the number of reviews", y = "Price", x = "Number of reviews"))

In this code I transformed the data so that it removed extreme outliers form the data

# Function to detect outliers using IQR and remove them
remove_outliers <- function(data, variable) {
  q1 <- quantile(data[[variable]], 0.25)
  q3 <- quantile(data[[variable]], 0.75)
  iqr <- q3 - q1
  lower_bound <- q1 - 1.5 * iqr
  upper_bound <- q3 + 4.5 * iqr
  
  # Remove outliers from the data frame
  filtered_data <- data[!(data[[variable]] < lower_bound | data[[variable]] > upper_bound), ]
  return(filtered_data)
}

# Remove outliers from the 'price' variable in air_clean
air_clean_filtered <- remove_outliers(air_clean, 'price')
p = ggplot(air_clean_filtered, aes(y = air_clean_filtered$price, x = air_clean_filtered$accommodates)) + geom_point() + facet_wrap(~air_clean_filtered$bedrooms)
print(p + labs(title = "Price compared to accommodation size split by bedrooms (Model 3)", y = "Price", x = "Accommodates"))

p = ggplot(air_clean_filtered, aes(y = air_clean_filtered$price, x = air_clean_filtered$accommodates)) + geom_point() + facet_wrap(~air_clean_filtered$bathrooms) 
#+ geom_boxplot(aes(group = air_clean_filtered$accommodates))
print(p + labs(title = "Price compared to accommodation size split by bathrooms (Model 3)", y = "Price", x = "Accommodates")) 

p = ggplot(air_clean_filtered, aes(y = air_clean_filtered$price, x = air_clean_filtered$bathrooms)) + geom_boxplot(aes(group = air_clean_filtered$bathrooms)) + geom_smooth()
print(p + labs(title = "Price compared to bathrooms (Model 3)", y = "Price", x = "Bathrooms"))
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

p = ggplot(air_clean_filtered, aes(y = air_clean_filtered$price, x = air_clean_filtered$number_of_reviews)) + geom_point()
print(p + labs(title = "Price compared to the number of reviews (Model 3)", y = "Price", x = "Number of reviews"))

This model has the data without outliers and with all neighbourhoods.

# Fit a linear regression model with filtered data
updated_model <- lm(price ~ host_is_superhost + host_response_time + 
                      host_response_rate + host_acceptance_rate + 
                      accommodates + bathrooms + bedrooms + 
                      neighbourhood_cleansed + host_listings_count + 
                      minimum_nights + maximum_nights + instant_bookable + 
                      host_identity_verified + availability_30 + 
                      availability_60 + availability_90 + 
                      review_scores_rating + reviews_per_month,
                    data = air_clean_filtered)

# Summary of the updated model
summary(updated_model)
## 
## Call:
## lm(formula = price ~ host_is_superhost + host_response_time + 
##     host_response_rate + host_acceptance_rate + accommodates + 
##     bathrooms + bedrooms + neighbourhood_cleansed + host_listings_count + 
##     minimum_nights + maximum_nights + instant_bookable + host_identity_verified + 
##     availability_30 + availability_60 + availability_90 + review_scores_rating + 
##     reviews_per_month, data = air_clean_filtered)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -411.17  -41.58  -10.67   26.03  531.67 
## 
## Coefficients:
##                                                  Estimate Std. Error t value
## (Intercept)                                    -57.983961  22.174989  -2.615
## host_is_superhostt                              13.947462   2.499798   5.579
## host_response_timewithin a day                 -18.781762  19.987237  -0.940
## host_response_timewithin a few hours           -17.400411  21.185949  -0.821
## host_response_timewithin an hour               -16.127042  21.405478  -0.753
## host_response_rate                             -23.410321  21.871553  -1.070
## host_acceptance_rate                            14.932259   8.179617   1.826
## accommodates                                    10.940097   1.109111   9.864
## bathrooms                                       31.063010   2.892050  10.741
## bedrooms                                        37.056934   2.801245  13.229
## neighbourhood_cleansedDowntown                  40.631438  10.220205   3.976
## neighbourhood_cleansedDowntown Eastside         26.259151  11.105902   2.364
## neighbourhood_cleansedDunbar Southlands        -12.188136  12.079041  -1.009
## neighbourhood_cleansedFairview                  24.229962  12.461023   1.944
## neighbourhood_cleansedGrandview-Woodland        -3.173455  11.606267  -0.273
## neighbourhood_cleansedHastings-Sunrise         -31.746715  11.503076  -2.760
## neighbourhood_cleansedKensington-Cedar Cottage -18.842178  11.002814  -1.712
## neighbourhood_cleansedKerrisdale               -48.170640  14.177506  -3.398
## neighbourhood_cleansedKillarney                -49.508179  14.074920  -3.517
## neighbourhood_cleansedKitsilano                 42.100907  10.765304   3.911
## neighbourhood_cleansedMarpole                  -36.050017  12.319479  -2.926
## neighbourhood_cleansedMount Pleasant            18.524095  10.988627   1.686
## neighbourhood_cleansedOakridge                 -32.931088  13.556107  -2.429
## neighbourhood_cleansedRenfrew-Collingwood      -31.604184  11.356458  -2.783
## neighbourhood_cleansedRiley Park                -3.728424  11.090988  -0.336
## neighbourhood_cleansedShaughnessy              -14.756341  13.514720  -1.092
## neighbourhood_cleansedSouth Cambie              -2.487114  13.957394  -0.178
## neighbourhood_cleansedStrathcona                19.861608  17.378926   1.143
## neighbourhood_cleansedSunset                   -32.298030  12.669553  -2.549
## neighbourhood_cleansedVictoria-Fraserview      -59.136488  13.048136  -4.532
## neighbourhood_cleansedWest End                  38.452235  10.887924   3.532
## neighbourhood_cleansedWest Point Grey           17.801929  14.160330   1.257
## host_listings_count                              0.117750   0.037089   3.175
## minimum_nights                                  -0.623033   0.059016 -10.557
## maximum_nights                                  -0.002371   0.002463  -0.963
## instant_bookablet                               -5.350815   2.800213  -1.911
## host_identity_verifiedt                         -3.991732   6.492820  -0.615
## availability_30                                  1.159694   0.285848   4.057
## availability_60                                 -0.474867   0.280380  -1.694
## availability_90                                  0.211564   0.140628   1.504
## review_scores_rating                            24.125235   2.879938   8.377
## reviews_per_month                               -5.242094   0.762804  -6.872
##                                                            Pr(>|t|)    
## (Intercept)                                                0.008963 ** 
## host_is_superhostt                                 0.00000002584819 ***
## host_response_timewithin a day                             0.347439    
## host_response_timewithin a few hours                       0.411517    
## host_response_timewithin an hour                           0.451253    
## host_response_rate                                         0.284529    
## host_acceptance_rate                                       0.067999 .  
## accommodates                                   < 0.0000000000000002 ***
## bathrooms                                      < 0.0000000000000002 ***
## bedrooms                                       < 0.0000000000000002 ***
## neighbourhood_cleansedDowntown                     0.00007153399552 ***
## neighbourhood_cleansedDowntown Eastside                    0.018109 *  
## neighbourhood_cleansedDunbar Southlands                    0.313025    
## neighbourhood_cleansedFairview                             0.051915 .  
## neighbourhood_cleansedGrandview-Woodland                   0.784541    
## neighbourhood_cleansedHastings-Sunrise                     0.005811 ** 
## neighbourhood_cleansedKensington-Cedar Cottage             0.086890 .  
## neighbourhood_cleansedKerrisdale                           0.000687 ***
## neighbourhood_cleansedKillarney                            0.000441 ***
## neighbourhood_cleansedKitsilano                    0.00009363165656 ***
## neighbourhood_cleansedMarpole                              0.003451 ** 
## neighbourhood_cleansedMount Pleasant                       0.091927 .  
## neighbourhood_cleansedOakridge                             0.015177 *  
## neighbourhood_cleansedRenfrew-Collingwood                  0.005414 ** 
## neighbourhood_cleansedRiley Park                           0.736764    
## neighbourhood_cleansedShaughnessy                          0.274960    
## neighbourhood_cleansedSouth Cambie                         0.858581    
## neighbourhood_cleansedStrathcona                           0.253172    
## neighbourhood_cleansedSunset                               0.010835 *  
## neighbourhood_cleansedVictoria-Fraserview          0.00000602112568 ***
## neighbourhood_cleansedWest End                             0.000418 ***
## neighbourhood_cleansedWest Point Grey                      0.208771    
## host_listings_count                                        0.001512 ** 
## minimum_nights                                 < 0.0000000000000002 ***
## maximum_nights                                             0.335804    
## instant_bookablet                                          0.056099 .  
## host_identity_verifiedt                                    0.538730    
## availability_30                                    0.00005071876326 ***
## availability_60                                            0.090414 .  
## availability_90                                            0.132558    
## review_scores_rating                           < 0.0000000000000002 ***
## reviews_per_month                                  0.00000000000738 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 69.47 on 3725 degrees of freedom
## Multiple R-squared:  0.5254, Adjusted R-squared:  0.5202 
## F-statistic: 100.6 on 41 and 3725 DF,  p-value: < 0.00000000000000022
# Perform stepwise selection on the updated model
step_model3 <- step(updated_model)
## Start:  AIC=31992.58
## price ~ host_is_superhost + host_response_time + host_response_rate + 
##     host_acceptance_rate + accommodates + bathrooms + bedrooms + 
##     neighbourhood_cleansed + host_listings_count + minimum_nights + 
##     maximum_nights + instant_bookable + host_identity_verified + 
##     availability_30 + availability_60 + availability_90 + review_scores_rating + 
##     reviews_per_month
## 
##                          Df Sum of Sq      RSS   AIC
## - host_response_time      3      6420 17983157 31988
## - host_identity_verified  1      1824 17978561 31991
## - maximum_nights          1      4472 17981209 31992
## - host_response_rate      1      5529 17982266 31992
## <none>                                17976737 31993
## - availability_90         1     10923 17987659 31993
## - availability_60         1     13843 17990580 31993
## - host_acceptance_rate    1     16083 17992820 31994
## - instant_bookable        1     17621 17994358 31994
## - host_listings_count     1     48643 18025379 32001
## - availability_30         1     79433 18056170 32007
## - host_is_superhost       1    150233 18126970 32022
## - reviews_per_month       1    227912 18204649 32038
## - review_scores_rating    1    338658 18315395 32061
## - accommodates            1    469544 18446281 32088
## - minimum_nights          1    537856 18514593 32102
## - bathrooms               1    556750 18533486 32105
## - bedrooms                1    844542 18821279 32164
## - neighbourhood_cleansed 22   3320006 21296743 32587
## 
## Step:  AIC=31987.93
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + maximum_nights + instant_bookable + 
##     host_identity_verified + availability_30 + availability_60 + 
##     availability_90 + review_scores_rating + reviews_per_month
## 
##                          Df Sum of Sq      RSS   AIC
## - host_identity_verified  1      1943 17985100 31986
## - maximum_nights          1      4564 17987721 31987
## <none>                                17983157 31988
## - availability_90         1     11211 17994368 31988
## - availability_60         1     13867 17997024 31989
## - instant_bookable        1     16118 17999275 31989
## - host_acceptance_rate    1     20384 18003541 31990
## - host_response_rate      1     42187 18025344 31995
## - host_listings_count     1     49403 18032560 31996
## - availability_30         1     77876 18061033 32002
## - host_is_superhost       1    154372 18137529 32018
## - reviews_per_month       1    227036 18210193 32033
## - review_scores_rating    1    343863 18327020 32057
## - accommodates            1    472461 18455618 32084
## - minimum_nights          1    545617 18528774 32099
## - bathrooms               1    555655 18538812 32101
## - bedrooms                1    844522 18827679 32159
## - neighbourhood_cleansed 22   3315671 21298828 32581
## 
## Step:  AIC=31986.33
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + maximum_nights + instant_bookable + 
##     availability_30 + availability_60 + availability_90 + review_scores_rating + 
##     reviews_per_month
## 
##                          Df Sum of Sq      RSS   AIC
## - maximum_nights          1      4722 17989822 31985
## <none>                                17985100 31986
## - availability_90         1     11174 17996274 31987
## - availability_60         1     13967 17999067 31987
## - instant_bookable        1     15927 18001028 31988
## - host_acceptance_rate    1     20649 18005749 31989
## - host_response_rate      1     42567 18027668 31993
## - host_listings_count     1     48613 18033713 31995
## - availability_30         1     78304 18063404 32001
## - host_is_superhost       1    152620 18137720 32016
## - reviews_per_month       1    227711 18212811 32032
## - review_scores_rating    1    344806 18329906 32056
## - accommodates            1    472261 18457361 32082
## - minimum_nights          1    545159 18530259 32097
## - bathrooms               1    556147 18541247 32099
## - bedrooms                1    844428 18829528 32157
## - neighbourhood_cleansed 22   3313948 21299048 32579
## 
## Step:  AIC=31985.32
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + instant_bookable + 
##     availability_30 + availability_60 + availability_90 + review_scores_rating + 
##     reviews_per_month
## 
##                          Df Sum of Sq      RSS   AIC
## <none>                                17989822 31985
## - availability_90         1     10742 18000564 31986
## - availability_60         1     13664 18003486 31986
## - instant_bookable        1     15478 18005300 31987
## - host_acceptance_rate    1     20382 18010204 31988
## - host_response_rate      1     42659 18032482 31992
## - host_listings_count     1     48652 18038474 31993
## - availability_30         1     78937 18068759 32000
## - host_is_superhost       1    150090 18139912 32015
## - reviews_per_month       1    224736 18214558 32030
## - review_scores_rating    1    349248 18339070 32056
## - accommodates            1    471305 18461127 32081
## - bathrooms               1    557933 18547756 32098
## - minimum_nights          1    560158 18549980 32099
## - bedrooms                1    842009 18831832 32156
## - neighbourhood_cleansed 22   3310635 21300457 32578
# Summary of the model with stepwise selection
summary(step_model3)
## 
## Call:
## lm(formula = price ~ host_is_superhost + host_response_rate + 
##     host_acceptance_rate + accommodates + bathrooms + bedrooms + 
##     neighbourhood_cleansed + host_listings_count + minimum_nights + 
##     instant_bookable + availability_30 + availability_60 + availability_90 + 
##     review_scores_rating + reviews_per_month, data = air_clean_filtered)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -409.80  -41.29  -10.55   25.80  530.25 
## 
## Coefficients:
##                                                 Estimate Std. Error t value
## (Intercept)                                    -69.21271   20.63659  -3.354
## host_is_superhostt                              13.85318    2.48332   5.578
## host_response_rate                             -36.38087   12.23279  -2.974
## host_acceptance_rate                            16.20087    7.88086   2.056
## accommodates                                    10.94905    1.10760   9.885
## bathrooms                                       31.08663    2.89029  10.756
## bedrooms                                        36.98650    2.79926  13.213
## neighbourhood_cleansedDowntown                  40.47065   10.21452   3.962
## neighbourhood_cleansedDowntown Eastside         26.15010   11.10106   2.356
## neighbourhood_cleansedDunbar Southlands        -11.68747   12.06476  -0.969
## neighbourhood_cleansedFairview                  23.79987   12.44651   1.912
## neighbourhood_cleansedGrandview-Woodland        -2.72451   11.59912  -0.235
## neighbourhood_cleansedHastings-Sunrise         -31.74231   11.49686  -2.761
## neighbourhood_cleansedKensington-Cedar Cottage -18.53389   10.99567  -1.686
## neighbourhood_cleansedKerrisdale               -48.28388   14.17200  -3.407
## neighbourhood_cleansedKillarney                -49.23182   14.06697  -3.500
## neighbourhood_cleansedKitsilano                 42.07129   10.76013   3.910
## neighbourhood_cleansedMarpole                  -36.48172   12.31180  -2.963
## neighbourhood_cleansedMount Pleasant            18.55323   10.98336   1.689
## neighbourhood_cleansedOakridge                 -33.29184   13.52923  -2.461
## neighbourhood_cleansedRenfrew-Collingwood      -31.23351   11.34767  -2.752
## neighbourhood_cleansedRiley Park                -3.55074   11.08524  -0.320
## neighbourhood_cleansedShaughnessy              -13.96721   13.47064  -1.037
## neighbourhood_cleansedSouth Cambie              -2.49874   13.94926  -0.179
## neighbourhood_cleansedStrathcona                19.79825   17.36209   1.140
## neighbourhood_cleansedSunset                   -32.03609   12.66234  -2.530
## neighbourhood_cleansedVictoria-Fraserview      -58.74530   13.04185  -4.504
## neighbourhood_cleansedWest End                  38.20544   10.88267   3.511
## neighbourhood_cleansedWest Point Grey           18.03039   14.15079   1.274
## host_listings_count                              0.11741    0.03697   3.176
## minimum_nights                                  -0.63169    0.05862 -10.777
## instant_bookablet                               -4.97598    2.77767  -1.791
## availability_30                                  1.15493    0.28548   4.046
## availability_60                                 -0.47166    0.28022  -1.683
## availability_90                                  0.20954    0.14040   1.492
## review_scores_rating                            24.36282    2.86299   8.510
## reviews_per_month                               -5.13073    0.75163  -6.826
##                                                            Pr(>|t|)    
## (Intercept)                                                0.000805 ***
## host_is_superhostt                                  0.0000000259865 ***
## host_response_rate                                         0.002958 ** 
## host_acceptance_rate                                       0.039879 *  
## accommodates                                   < 0.0000000000000002 ***
## bathrooms                                      < 0.0000000000000002 ***
## bedrooms                                       < 0.0000000000000002 ***
## neighbourhood_cleansedDowntown                      0.0000756915373 ***
## neighbourhood_cleansedDowntown Eastside                    0.018542 *  
## neighbourhood_cleansedDunbar Southlands                    0.332744    
## neighbourhood_cleansedFairview                             0.055931 .  
## neighbourhood_cleansedGrandview-Woodland                   0.814308    
## neighbourhood_cleansedHastings-Sunrise                     0.005791 ** 
## neighbourhood_cleansedKensington-Cedar Cottage             0.091964 .  
## neighbourhood_cleansedKerrisdale                           0.000664 ***
## neighbourhood_cleansedKillarney                            0.000471 ***
## neighbourhood_cleansedKitsilano                     0.0000939666432 ***
## neighbourhood_cleansedMarpole                              0.003064 ** 
## neighbourhood_cleansedMount Pleasant                       0.091262 .  
## neighbourhood_cleansedOakridge                             0.013910 *  
## neighbourhood_cleansedRenfrew-Collingwood                  0.005944 ** 
## neighbourhood_cleansedRiley Park                           0.748749    
## neighbourhood_cleansedShaughnessy                          0.299867    
## neighbourhood_cleansedSouth Cambie                         0.857845    
## neighbourhood_cleansedStrathcona                           0.254228    
## neighbourhood_cleansedSunset                               0.011446 *  
## neighbourhood_cleansedVictoria-Fraserview           0.0000068608695 ***
## neighbourhood_cleansedWest End                             0.000452 ***
## neighbourhood_cleansedWest Point Grey                      0.202685    
## host_listings_count                                        0.001505 ** 
## minimum_nights                                 < 0.0000000000000002 ***
## instant_bookablet                                          0.073307 .  
## availability_30                                     0.0000532489686 ***
## availability_60                                            0.092430 .  
## availability_90                                            0.135684    
## review_scores_rating                           < 0.0000000000000002 ***
## reviews_per_month                                   0.0000000000101 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 69.45 on 3730 degrees of freedom
## Multiple R-squared:  0.5251, Adjusted R-squared:  0.5205 
## F-statistic: 114.6 on 36 and 3730 DF,  p-value: < 0.00000000000000022

This model has the outliers removed and only significant neighbourhoods

# Filter the dataset to include only significant neighbourhoods with filtered data
air_clean_sig2 <- air_clean_filtered %>%
  filter(neighbourhood_cleansed %in% significant_neighborhoods)

# Fit a linear regression model with significant neighbourhoods and filtered data
lm_model_significant_neighborhoods2 <- lm(price ~ host_is_superhost + host_response_time + 
                      host_response_rate + host_acceptance_rate + 
                      accommodates + bathrooms + bedrooms + 
                      neighbourhood_cleansed + host_listings_count + 
                      minimum_nights + maximum_nights + instant_bookable + 
                      host_identity_verified + availability_30 + 
                      availability_60 + availability_90 + 
                      review_scores_rating + reviews_per_month, 
                                       data = air_clean_sig2)

# Get the summary of the model with significant neighbourhoods and filtered data
summary(lm_model_significant_neighborhoods2)
## 
## Call:
## lm(formula = price ~ host_is_superhost + host_response_time + 
##     host_response_rate + host_acceptance_rate + accommodates + 
##     bathrooms + bedrooms + neighbourhood_cleansed + host_listings_count + 
##     minimum_nights + maximum_nights + instant_bookable + host_identity_verified + 
##     availability_30 + availability_60 + availability_90 + review_scores_rating + 
##     reviews_per_month, data = air_clean_sig2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -388.50  -41.57  -10.17   25.36  529.06 
## 
## Coefficients:
##                                             Estimate Std. Error t value
## (Intercept)                               -33.494926  22.484505  -1.490
## host_is_superhostt                         16.486253   2.898294   5.688
## host_response_timewithin a day             -6.955686  22.554585  -0.308
## host_response_timewithin a few hours      -12.246510  23.949606  -0.511
## host_response_timewithin an hour           -6.802423  24.230589  -0.281
## host_response_rate                        -22.265309  24.067020  -0.925
## host_acceptance_rate                       15.367224   9.896277   1.553
## accommodates                               10.368207   1.268506   8.174
## bathrooms                                  32.493244   3.477076   9.345
## bedrooms                                   31.953386   3.253569   9.821
## neighbourhood_cleansedDowntown Eastside   -14.416014   5.324429  -2.708
## neighbourhood_cleansedHastings-Sunrise    -70.085293   6.225902 -11.257
## neighbourhood_cleansedKerrisdale          -84.529912  10.403238  -8.125
## neighbourhood_cleansedKillarney           -86.299194  10.261253  -8.410
## neighbourhood_cleansedKitsilano             4.753028   4.725043   1.006
## neighbourhood_cleansedMarpole             -75.113139   7.578996  -9.911
## neighbourhood_cleansedMount Pleasant      -20.111173   5.126506  -3.923
## neighbourhood_cleansedOakridge            -69.532567   9.528276  -7.297
## neighbourhood_cleansedRenfrew-Collingwood -69.836883   5.926396 -11.784
## neighbourhood_cleansedSunset              -69.170735   8.170675  -8.466
## neighbourhood_cleansedVictoria-Fraserview -96.241632   8.817099 -10.915
## neighbourhood_cleansedWest End             -1.838209   4.817536  -0.382
## host_listings_count                         0.147620   0.038198   3.865
## minimum_nights                             -0.524097   0.065133  -8.047
## maximum_nights                             -0.001959   0.002901  -0.675
## instant_bookablet                          -6.000667   3.245169  -1.849
## host_identity_verifiedt                    -0.496510   7.941801  -0.063
## availability_30                             0.961190   0.328615   2.925
## availability_60                            -0.224764   0.318781  -0.705
## availability_90                             0.199924   0.160358   1.247
## review_scores_rating                       24.092014   3.174877   7.588
## reviews_per_month                          -3.820635   0.858622  -4.450
##                                                       Pr(>|t|)    
## (Intercept)                                           0.136421    
## host_is_superhostt                        0.000000014191313129 ***
## host_response_timewithin a day                        0.757806    
## host_response_timewithin a few hours                  0.609151    
## host_response_timewithin an hour                      0.778933    
## host_response_rate                                    0.354976    
## host_acceptance_rate                                  0.120579    
## accommodates                              0.000000000000000452 ***
## bathrooms                                 < 0.0000000000000002 ***
## bedrooms                                  < 0.0000000000000002 ***
## neighbourhood_cleansedDowntown Eastside               0.006821 ** 
## neighbourhood_cleansedHastings-Sunrise    < 0.0000000000000002 ***
## neighbourhood_cleansedKerrisdale          0.000000000000000668 ***
## neighbourhood_cleansedKillarney           < 0.0000000000000002 ***
## neighbourhood_cleansedKitsilano                       0.314542    
## neighbourhood_cleansedMarpole             < 0.0000000000000002 ***
## neighbourhood_cleansedMount Pleasant      0.000089608773389718 ***
## neighbourhood_cleansedOakridge            0.000000000000382087 ***
## neighbourhood_cleansedRenfrew-Collingwood < 0.0000000000000002 ***
## neighbourhood_cleansedSunset              < 0.0000000000000002 ***
## neighbourhood_cleansedVictoria-Fraserview < 0.0000000000000002 ***
## neighbourhood_cleansedWest End                        0.702813    
## host_listings_count                                   0.000114 ***
## minimum_nights                            0.000000000000001255 ***
## maximum_nights                                        0.499538    
## instant_bookablet                                     0.064550 .  
## host_identity_verifiedt                               0.950154    
## availability_30                                       0.003473 ** 
## availability_60                                       0.480825    
## availability_90                                       0.212599    
## review_scores_rating                      0.000000000000044119 ***
## reviews_per_month                         0.000008940876595746 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 69.37 on 2741 degrees of freedom
## Multiple R-squared:  0.4962, Adjusted R-squared:  0.4905 
## F-statistic: 87.08 on 31 and 2741 DF,  p-value: < 0.00000000000000022
# Perform stepwise selection on the model
step_model4 <- step(lm_model_significant_neighborhoods2)
## Start:  AIC=23543.67
## price ~ host_is_superhost + host_response_time + host_response_rate + 
##     host_acceptance_rate + accommodates + bathrooms + bedrooms + 
##     neighbourhood_cleansed + host_listings_count + minimum_nights + 
##     maximum_nights + instant_bookable + host_identity_verified + 
##     availability_30 + availability_60 + availability_90 + review_scores_rating + 
##     reviews_per_month
## 
##                          Df Sum of Sq      RSS   AIC
## - host_response_time      3     10095 13199573 23540
## - host_identity_verified  1        19 13189497 23542
## - maximum_nights          1      2194 13191673 23542
## - availability_60         1      2392 13191870 23542
## - host_response_rate      1      4118 13193597 23543
## - availability_90         1      7479 13196958 23543
## <none>                                13189478 23544
## - host_acceptance_rate    1     11603 13201081 23544
## - instant_bookable        1     16453 13205931 23545
## - availability_30         1     41168 13230646 23550
## - host_listings_count     1     71865 13261343 23557
## - reviews_per_month       1     95276 13284755 23562
## - host_is_superhost       1    155696 13345174 23574
## - review_scores_rating    1    277084 13466562 23599
## - minimum_nights          1    311558 13501036 23606
## - accommodates            1    321470 13510948 23608
## - bathrooms               1    420220 13609698 23629
## - bedrooms                1    464122 13653600 23638
## - neighbourhood_cleansed 12   2709338 15898816 24038
## 
## Step:  AIC=23539.79
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + maximum_nights + instant_bookable + 
##     host_identity_verified + availability_30 + availability_60 + 
##     availability_90 + review_scores_rating + reviews_per_month
## 
##                          Df Sum of Sq      RSS   AIC
## - host_identity_verified  1        53 13199626 23538
## - maximum_nights          1      1720 13201293 23538
## - availability_60         1      2402 13201975 23538
## - availability_90         1      7846 13207419 23539
## <none>                                13199573 23540
## - instant_bookable        1     13880 13213453 23541
## - host_acceptance_rate    1     17336 13216909 23541
## - host_response_rate      1     22322 13221895 23543
## - availability_30         1     40689 13240262 23546
## - host_listings_count     1     69024 13268598 23552
## - reviews_per_month       1     90398 13289971 23557
## - host_is_superhost       1    160846 13360419 23571
## - review_scores_rating    1    277091 13476664 23595
## - minimum_nights          1    319454 13519027 23604
## - accommodates            1    328047 13527620 23606
## - bathrooms               1    421017 13620590 23625
## - bedrooms                1    464058 13663632 23634
## - neighbourhood_cleansed 12   2718313 15917886 24035
## 
## Step:  AIC=23537.8
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + maximum_nights + instant_bookable + 
##     availability_30 + availability_60 + availability_90 + review_scores_rating + 
##     reviews_per_month
## 
##                          Df Sum of Sq      RSS   AIC
## - maximum_nights          1      1733 13201359 23536
## - availability_60         1      2402 13202028 23536
## - availability_90         1      7837 13207463 23537
## <none>                                13199626 23538
## - instant_bookable        1     13889 13213515 23539
## - host_acceptance_rate    1     17381 13217008 23540
## - host_response_rate      1     22365 13221991 23541
## - availability_30         1     40734 13240360 23544
## - host_listings_count     1     68993 13268620 23550
## - reviews_per_month       1     90500 13290126 23555
## - host_is_superhost       1    161122 13360748 23569
## - review_scores_rating    1    277150 13476776 23593
## - minimum_nights          1    319411 13519038 23602
## - accommodates            1    328033 13527660 23604
## - bathrooms               1    421005 13620631 23623
## - bedrooms                1    464057 13663684 23632
## - neighbourhood_cleansed 12   2719935 15919562 24033
## 
## Step:  AIC=23536.17
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + instant_bookable + 
##     availability_30 + availability_60 + availability_90 + review_scores_rating + 
##     reviews_per_month
## 
##                          Df Sum of Sq      RSS   AIC
## - availability_60         1      2243 13203602 23535
## - availability_90         1      7516 13208875 23536
## <none>                                13201359 23536
## - instant_bookable        1     13450 13214809 23537
## - host_acceptance_rate    1     17356 13218715 23538
## - host_response_rate      1     22353 13223712 23539
## - availability_30         1     40730 13242089 23543
## - host_listings_count     1     68831 13270190 23549
## - reviews_per_month       1     89178 13290537 23553
## - host_is_superhost       1    159619 13360978 23568
## - review_scores_rating    1    279909 13481268 23592
## - minimum_nights          1    325185 13526544 23602
## - accommodates            1    328369 13529728 23602
## - bathrooms               1    421624 13622983 23621
## - bedrooms                1    462455 13663814 23630
## - neighbourhood_cleansed 12   2718799 15920158 24032
## 
## Step:  AIC=23534.64
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + instant_bookable + 
##     availability_30 + availability_90 + review_scores_rating + 
##     reviews_per_month
## 
##                          Df Sum of Sq      RSS   AIC
## - availability_90         1      8900 13212502 23535
## <none>                                13203602 23535
## - instant_bookable        1     13551 13217152 23536
## - host_acceptance_rate    1     17347 13220949 23536
## - host_response_rate      1     22536 13226138 23537
## - availability_30         1     60512 13264113 23545
## - host_listings_count     1     69659 13273261 23547
## - reviews_per_month       1     90517 13294119 23552
## - host_is_superhost       1    158177 13361779 23566
## - review_scores_rating    1    280634 13484235 23591
## - minimum_nights          1    323693 13527295 23600
## - accommodates            1    328351 13531953 23601
## - bathrooms               1    421044 13624646 23620
## - bedrooms                1    461902 13665503 23628
## - neighbourhood_cleansed 12   2716602 15920203 24030
## 
## Step:  AIC=23534.51
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + instant_bookable + 
##     availability_30 + review_scores_rating + reviews_per_month
## 
##                          Df Sum of Sq      RSS   AIC
## <none>                                13212502 23535
## - instant_bookable        1     12753 13225255 23535
## - host_acceptance_rate    1     19811 13232312 23537
## - host_response_rate      1     23811 13236313 23538
## - host_listings_count     1     73465 13285967 23548
## - reviews_per_month       1     88623 13301125 23551
## - host_is_superhost       1    154495 13366996 23565
## - availability_30         1    252119 13464620 23585
## - review_scores_rating    1    280320 13492821 23591
## - minimum_nights          1    321858 13534359 23599
## - accommodates            1    332452 13544954 23601
## - bathrooms               1    420075 13632577 23619
## - bedrooms                1    459982 13672484 23627
## - neighbourhood_cleansed 12   2714326 15926828 24029
# Summary of the model with stepwise selection
summary(step_model4)
## 
## Call:
## lm(formula = price ~ host_is_superhost + host_response_rate + 
##     host_acceptance_rate + accommodates + bathrooms + bedrooms + 
##     neighbourhood_cleansed + host_listings_count + minimum_nights + 
##     instant_bookable + availability_30 + review_scores_rating + 
##     reviews_per_month, data = air_clean_sig2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -393.23  -41.77  -10.48   25.41  525.80 
## 
## Coefficients:
##                                            Estimate Std. Error t value
## (Intercept)                               -36.89800   19.94690  -1.850
## host_is_superhostt                         16.26849    2.86995   5.669
## host_response_rate                        -30.43399   13.67577  -2.225
## host_acceptance_rate                       19.16596    9.44204   2.030
## accommodates                               10.52074    1.26522   8.315
## bathrooms                                  32.48178    3.47505   9.347
## bedrooms                                   31.76318    3.24741   9.781
## neighbourhood_cleansedDowntown Eastside   -14.81689    5.31292  -2.789
## neighbourhood_cleansedHastings-Sunrise    -70.45222    6.20485 -11.354
## neighbourhood_cleansedKerrisdale          -85.34523   10.38384  -8.219
## neighbourhood_cleansedKillarney           -85.64904   10.24327  -8.361
## neighbourhood_cleansedKitsilano             4.23298    4.69045   0.902
## neighbourhood_cleansedMarpole             -75.16791    7.56539  -9.936
## neighbourhood_cleansedMount Pleasant      -20.49215    5.11060  -4.010
## neighbourhood_cleansedOakridge            -70.54750    9.49403  -7.431
## neighbourhood_cleansedRenfrew-Collingwood -69.46372    5.90801 -11.758
## neighbourhood_cleansedSunset              -69.02221    8.15015  -8.469
## neighbourhood_cleansedVictoria-Fraserview -95.93951    8.79977 -10.903
## neighbourhood_cleansedWest End             -1.92384    4.80865  -0.400
## host_listings_count                         0.14821    0.03792   3.909
## minimum_nights                             -0.52930    0.06469  -8.182
## instant_bookablet                          -5.23210    3.21259  -1.629
## availability_30                             1.02492    0.14154   7.241
## review_scores_rating                       24.07857    3.15347   7.636
## reviews_per_month                          -3.62517    0.84438  -4.293
##                                                       Pr(>|t|)    
## (Intercept)                                            0.06445 .  
## host_is_superhostt                        0.000000015898367023 ***
## host_response_rate                                     0.02614 *  
## host_acceptance_rate                                   0.04247 *  
## accommodates                              < 0.0000000000000002 ***
## bathrooms                                 < 0.0000000000000002 ***
## bedrooms                                  < 0.0000000000000002 ***
## neighbourhood_cleansedDowntown Eastside                0.00533 ** 
## neighbourhood_cleansedHastings-Sunrise    < 0.0000000000000002 ***
## neighbourhood_cleansedKerrisdale          0.000000000000000312 ***
## neighbourhood_cleansedKillarney           < 0.0000000000000002 ***
## neighbourhood_cleansedKitsilano                        0.36689    
## neighbourhood_cleansedMarpole             < 0.0000000000000002 ***
## neighbourhood_cleansedMount Pleasant      0.000062405504155170 ***
## neighbourhood_cleansedOakridge            0.000000000000143419 ***
## neighbourhood_cleansedRenfrew-Collingwood < 0.0000000000000002 ***
## neighbourhood_cleansedSunset              < 0.0000000000000002 ***
## neighbourhood_cleansedVictoria-Fraserview < 0.0000000000000002 ***
## neighbourhood_cleansedWest End                         0.68913    
## host_listings_count                       0.000094953829834063 ***
## minimum_nights                            0.000000000000000423 ***
## instant_bookablet                                      0.10351    
## availability_30                           0.000000000000574160 ***
## review_scores_rating                      0.000000000000030812 ***
## reviews_per_month                         0.000018214648284954 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 69.34 on 2748 degrees of freedom
## Multiple R-squared:  0.4953, Adjusted R-squared:  0.4909 
## F-statistic: 112.4 on 24 and 2748 DF,  p-value: < 0.00000000000000022
p = ggplot(air_clean_sig2, aes(y = air_clean_sig2$price, x = air_clean_sig2$accommodates)) + geom_point() + facet_wrap(~air_clean_sig2$bedrooms)
print(p + labs(title = "Price compared to accommodation size split by bedrooms (Model 4)", y = "Price", x = "Accommodates"))

p = ggplot(air_clean_sig2, aes(y = air_clean_sig2$price, x = air_clean_sig2$accommodates)) + geom_point() + facet_wrap(~air_clean_sig2$bathrooms) + geom_boxplot(aes(group = air_clean_sig2$accommodates))
print(p + labs(title = "Price compared to accommodation size split by bathrooms (Model 4)", y = "Price", x = "Accommodates")) 

p = ggplot(air_clean_sig2, aes(y = air_clean_sig2$price, x = air_clean_sig2$bathrooms)) + geom_boxplot(aes(group = air_clean_sig2$bathrooms)) + geom_smooth()
print(p + labs(title = "Price compared to bathrooms (Model 4)", y = "Price", x = "Bathrooms"))
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

p = ggplot(air_clean_sig2, aes(y = air_clean_sig2$price, x = air_clean_sig2$number_of_reviews)) + geom_point()
print(p + labs(title = "Price compared to the number of reviews (Model 4)", y = "Price", x = "Number of reviews"))

This is the logistic model with the same data as model 1 OLS

# Fit a logistic regression model using all variables
log_model <- glm(price ~ ., data = air_clean)
summary(log_model)
## 
## Call:
## glm(formula = price ~ ., data = air_clean)
## 
## Coefficients:
##                                                  Estimate Std. Error t value
## (Intercept)                                    -67.172676  80.562109  -0.834
## host_is_superhostt                              14.762586   6.761144   2.183
## host_response_timewithin a day                 -40.830748  52.200620  -0.782
## host_response_timewithin a few hours           -20.547067  55.349773  -0.371
## host_response_timewithin an hour               -31.411719  55.969879  -0.561
## host_response_rate                             -19.676596  57.839236  -0.340
## host_acceptance_rate                            28.722659  21.927178   1.310
## accommodates                                    14.238930   2.961959   4.807
## bathrooms                                       62.326685   7.552568   8.252
## bedrooms                                        36.506232   7.421203   4.919
## neighbourhood_cleansedDowntown                  20.043396  26.950665   0.744
## neighbourhood_cleansedDowntown Eastside          3.105323  29.397390   0.106
## neighbourhood_cleansedDunbar Southlands        -33.022531  31.988861  -1.032
## neighbourhood_cleansedFairview                  -4.899654  33.135114  -0.148
## neighbourhood_cleansedGrandview-Woodland       -28.848818  30.797361  -0.937
## neighbourhood_cleansedHastings-Sunrise         -58.146619  30.505126  -1.906
## neighbourhood_cleansedKensington-Cedar Cottage -37.351262  29.061805  -1.285
## neighbourhood_cleansedKerrisdale               -60.913923  37.472184  -1.626
## neighbourhood_cleansedKillarney                -79.760936  37.600875  -2.121
## neighbourhood_cleansedKitsilano                 24.745238  28.403872   0.871
## neighbourhood_cleansedMarpole                  -64.902130  32.760190  -1.981
## neighbourhood_cleansedMount Pleasant            30.965973  29.051691   1.066
## neighbourhood_cleansedOakridge                  -4.477156  35.540171  -0.126
## neighbourhood_cleansedRenfrew-Collingwood      -51.634954  30.070913  -1.717
## neighbourhood_cleansedRiley Park               -21.213545  29.297222  -0.724
## neighbourhood_cleansedShaughnessy              -14.743869  35.741681  -0.413
## neighbourhood_cleansedSouth Cambie             -24.586941  37.278204  -0.660
## neighbourhood_cleansedStrathcona                -7.691959  46.654500  -0.165
## neighbourhood_cleansedSunset                   -56.185199  33.637982  -1.670
## neighbourhood_cleansedVictoria-Fraserview      -93.818094  34.781102  -2.697
## neighbourhood_cleansedWest End                  18.125489  28.792180   0.630
## neighbourhood_cleansedWest Point Grey           74.093006  36.696526   2.019
## host_listings_count                              0.032812   0.100400   0.327
## minimum_nights                                   0.229169   0.154295   1.485
## maximum_nights                                   0.002245   0.006700   0.335
## instant_bookablet                               -8.823629   7.585355  -1.163
## host_identity_verifiedt                        -64.309810  17.284109  -3.721
## availability_30                                  1.715346   0.769031   2.231
## availability_60                                 -0.356230   0.759588  -0.469
## availability_90                                  0.085484   0.393249   0.217
## review_scores_rating                            32.805207   7.791446   4.210
## reviews_per_month                               -4.661847   2.270830  -2.053
## has_availabilityt                              -15.597963  54.691575  -0.285
## availability_365                                -0.005370   0.033605  -0.160
## number_of_reviews                               -0.026516   0.048188  -0.550
##                                                            Pr(>|t|)    
## (Intercept)                                                0.404447    
## host_is_superhostt                                         0.029064 *  
## host_response_timewithin a day                             0.434153    
## host_response_timewithin a few hours                       0.710493    
## host_response_timewithin an hour                           0.574677    
## host_response_rate                                         0.733729    
## host_acceptance_rate                                       0.190306    
## accommodates                                            0.000001590 ***
## bathrooms                                      < 0.0000000000000002 ***
## bedrooms                                                0.000000906 ***
## neighbourhood_cleansedDowntown                             0.457100    
## neighbourhood_cleansedDowntown Eastside                    0.915879    
## neighbourhood_cleansedDunbar Southlands                    0.301991    
## neighbourhood_cleansedFairview                             0.882454    
## neighbourhood_cleansedGrandview-Woodland                   0.348957    
## neighbourhood_cleansedHastings-Sunrise                     0.056710 .  
## neighbourhood_cleansedKensington-Cedar Cottage             0.198789    
## neighbourhood_cleansedKerrisdale                           0.104123    
## neighbourhood_cleansedKillarney                            0.033966 *  
## neighbourhood_cleansedKitsilano                            0.383705    
## neighbourhood_cleansedMarpole                              0.047649 *  
## neighbourhood_cleansedMount Pleasant                       0.286541    
## neighbourhood_cleansedOakridge                             0.899759    
## neighbourhood_cleansedRenfrew-Collingwood                  0.086042 .  
## neighbourhood_cleansedRiley Park                           0.469061    
## neighbourhood_cleansedShaughnessy                          0.679988    
## neighbourhood_cleansedSouth Cambie                         0.509581    
## neighbourhood_cleansedStrathcona                           0.869055    
## neighbourhood_cleansedSunset                               0.094945 .  
## neighbourhood_cleansedVictoria-Fraserview                  0.007020 ** 
## neighbourhood_cleansedWest End                             0.529041    
## neighbourhood_cleansedWest Point Grey                      0.043550 *  
## host_listings_count                                        0.743827    
## minimum_nights                                             0.137557    
## maximum_nights                                             0.737538    
## instant_bookablet                                          0.244804    
## host_identity_verifiedt                                    0.000202 ***
## availability_30                                            0.025771 *  
## availability_60                                            0.639112    
## availability_90                                            0.827924    
## review_scores_rating                                    0.000026085 ***
## reviews_per_month                                          0.040149 *  
## has_availabilityt                                          0.775508    
## availability_365                                           0.873048    
## number_of_reviews                                          0.582172    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 35374.87)
## 
##     Null deviance: 171918871  on 3817  degrees of freedom
## Residual deviance: 133469400  on 3773  degrees of freedom
## AIC: 50871
## 
## Number of Fisher Scoring iterations: 2
# Perform stepwise selection on logistic regression model
logstep_model1 <- step(log_model)
## Start:  AIC=50870.55
## price ~ host_is_superhost + host_response_time + host_response_rate + 
##     host_acceptance_rate + accommodates + bathrooms + bedrooms + 
##     neighbourhood_cleansed + host_listings_count + minimum_nights + 
##     maximum_nights + instant_bookable + host_identity_verified + 
##     availability_30 + availability_60 + availability_90 + review_scores_rating + 
##     reviews_per_month + has_availability + availability_365 + 
##     number_of_reviews
## 
##                          Df  Deviance   AIC
## - host_response_time      3 133571339 50867
## - availability_365        1 133470304 50869
## - availability_90         1 133471072 50869
## - has_availability        1 133472278 50869
## - host_listings_count     1 133473179 50869
## - maximum_nights          1 133473374 50869
## - host_response_rate      1 133473494 50869
## - availability_60         1 133477181 50869
## - number_of_reviews       1 133480112 50869
## - instant_bookable        1 133517268 50870
## - host_acceptance_rate    1 133530099 50870
## <none>                      133469400 50871
## - minimum_nights          1 133547438 50871
## - reviews_per_month       1 133618488 50873
## - host_is_superhost       1 133638048 50873
## - availability_30         1 133645399 50874
## - host_identity_verified  1 133959129 50883
## - review_scores_rating    1 134096511 50886
## - accommodates            1 134286908 50892
## - bedrooms                1 134325413 50893
## - bathrooms               1 135878494 50937
## - neighbourhood_cleansed 22 137735622 50947
## 
## Step:  AIC=50867.47
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + maximum_nights + instant_bookable + 
##     host_identity_verified + availability_30 + availability_60 + 
##     availability_90 + review_scores_rating + reviews_per_month + 
##     has_availability + availability_365 + number_of_reviews
## 
##                          Df  Deviance   AIC
## - availability_90         1 133573294 50866
## - maximum_nights          1 133573980 50866
## - has_availability        1 133574128 50866
## - availability_365        1 133574155 50866
## - host_listings_count     1 133577591 50866
## - availability_60         1 133579167 50866
## - number_of_reviews       1 133581934 50866
## - host_response_rate      1 133620592 50867
## - instant_bookable        1 133625033 50867
## - host_acceptance_rate    1 133628901 50867
## <none>                      133571339 50867
## - minimum_nights          1 133653270 50868
## - reviews_per_month       1 133738201 50870
## - availability_30         1 133742973 50870
## - host_is_superhost       1 133743863 50870
## - host_identity_verified  1 134053143 50879
## - review_scores_rating    1 134224542 50884
## - accommodates            1 134377074 50888
## - bedrooms                1 134432077 50890
## - bathrooms               1 135972049 50933
## - neighbourhood_cleansed 22 137816600 50943
## 
## Step:  AIC=50865.52
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + maximum_nights + instant_bookable + 
##     host_identity_verified + availability_30 + availability_60 + 
##     review_scores_rating + reviews_per_month + has_availability + 
##     availability_365 + number_of_reviews
## 
##                          Df  Deviance   AIC
## - availability_365        1 133575156 50864
## - maximum_nights          1 133575940 50864
## - has_availability        1 133576130 50864
## - host_listings_count     1 133580004 50864
## - availability_60         1 133583592 50864
## - number_of_reviews       1 133584164 50864
## - host_response_rate      1 133623189 50865
## - instant_bookable        1 133627198 50865
## - host_acceptance_rate    1 133632090 50865
## <none>                      133573294 50866
## - minimum_nights          1 133655707 50866
## - reviews_per_month       1 133739816 50868
## - host_is_superhost       1 133744308 50868
## - availability_30         1 133759340 50869
## - host_identity_verified  1 134054885 50877
## - review_scores_rating    1 134227040 50882
## - accommodates            1 134379388 50886
## - bedrooms                1 134432734 50888
## - bathrooms               1 135972401 50931
## - neighbourhood_cleansed 22 137816824 50941
## 
## Step:  AIC=50863.58
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + maximum_nights + instant_bookable + 
##     host_identity_verified + availability_30 + availability_60 + 
##     review_scores_rating + reviews_per_month + has_availability + 
##     number_of_reviews
## 
##                          Df  Deviance   AIC
## - maximum_nights          1 133577322 50862
## - has_availability        1 133577903 50862
## - host_listings_count     1 133581960 50862
## - number_of_reviews       1 133585759 50862
## - availability_60         1 133589656 50862
## - host_response_rate      1 133624655 50863
## - instant_bookable        1 133630107 50863
## - host_acceptance_rate    1 133633893 50863
## <none>                      133575156 50864
## - minimum_nights          1 133655790 50864
## - reviews_per_month       1 133740185 50866
## - host_is_superhost       1 133746269 50866
## - availability_30         1 133766211 50867
## - host_identity_verified  1 134057245 50875
## - review_scores_rating    1 134233350 50880
## - accommodates            1 134379651 50885
## - bedrooms                1 134437591 50886
## - bathrooms               1 135973181 50930
## - neighbourhood_cleansed 22 137818873 50939
## 
## Step:  AIC=50861.64
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + instant_bookable + 
##     host_identity_verified + availability_30 + availability_60 + 
##     review_scores_rating + reviews_per_month + has_availability + 
##     number_of_reviews
## 
##                          Df  Deviance   AIC
## - has_availability        1 133579986 50860
## - host_listings_count     1 133584148 50860
## - number_of_reviews       1 133587109 50860
## - availability_60         1 133591581 50860
## - host_response_rate      1 133626730 50861
## - instant_bookable        1 133632663 50861
## - host_acceptance_rate    1 133636372 50861
## <none>                      133577322 50862
## - minimum_nights          1 133661509 50862
## - reviews_per_month       1 133747403 50864
## - host_is_superhost       1 133750407 50865
## - availability_30         1 133767212 50865
## - host_identity_verified  1 134058286 50873
## - review_scores_rating    1 134233477 50878
## - accommodates            1 134382635 50883
## - bedrooms                1 134442466 50884
## - bathrooms               1 135973748 50928
## - neighbourhood_cleansed 22 137838435 50938
## 
## Step:  AIC=50859.71
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + instant_bookable + 
##     host_identity_verified + availability_30 + availability_60 + 
##     review_scores_rating + reviews_per_month + number_of_reviews
## 
##                          Df  Deviance   AIC
## - host_listings_count     1 133586809 50858
## - number_of_reviews       1 133589862 50858
## - availability_60         1 133594278 50858
## - host_response_rate      1 133629227 50859
## - instant_bookable        1 133636180 50859
## - host_acceptance_rate    1 133639353 50859
## <none>                      133579986 50860
## - minimum_nights          1 133663911 50860
## - reviews_per_month       1 133750961 50863
## - host_is_superhost       1 133754920 50863
## - availability_30         1 133770242 50863
## - host_identity_verified  1 134060436 50871
## - review_scores_rating    1 134234808 50876
## - accommodates            1 134387961 50881
## - bedrooms                1 134442641 50882
## - bathrooms               1 135982995 50926
## - neighbourhood_cleansed 22 137839934 50936
## 
## Step:  AIC=50857.91
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     minimum_nights + instant_bookable + host_identity_verified + 
##     availability_30 + availability_60 + review_scores_rating + 
##     reviews_per_month + number_of_reviews
## 
##                          Df  Deviance   AIC
## - number_of_reviews       1 133597402 50856
## - availability_60         1 133600149 50856
## - host_response_rate      1 133634998 50857
## - instant_bookable        1 133640108 50857
## - host_acceptance_rate    1 133646522 50858
## <none>                      133586809 50858
## - minimum_nights          1 133672157 50858
## - host_is_superhost       1 133756256 50861
## - reviews_per_month       1 133761452 50861
## - availability_30         1 133774257 50861
## - host_identity_verified  1 134062798 50869
## - review_scores_rating    1 134236528 50874
## - accommodates            1 134391163 50879
## - bedrooms                1 134455228 50881
## - bathrooms               1 135987792 50924
## - neighbourhood_cleansed 22 137902499 50935
## 
## Step:  AIC=50856.21
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     minimum_nights + instant_bookable + host_identity_verified + 
##     availability_30 + availability_60 + review_scores_rating + 
##     reviews_per_month
## 
##                          Df  Deviance   AIC
## - availability_60         1 133609135 50855
## - host_response_rate      1 133645052 50856
## - instant_bookable        1 133646297 50856
## - host_acceptance_rate    1 133656508 50856
## <none>                      133597402 50856
## - minimum_nights          1 133682957 50857
## - host_is_superhost       1 133759606 50859
## - availability_30         1 133781028 50859
## - reviews_per_month       1 133856509 50862
## - host_identity_verified  1 134082463 50868
## - review_scores_rating    1 134253891 50873
## - accommodates            1 134399744 50877
## - bedrooms                1 134472293 50879
## - bathrooms               1 136008468 50923
## - neighbourhood_cleansed 22 137924054 50934
## 
## Step:  AIC=50854.55
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     minimum_nights + instant_bookable + host_identity_verified + 
##     availability_30 + review_scores_rating + reviews_per_month
## 
##                          Df  Deviance   AIC
## - host_response_rate      1 133655440 50854
## - instant_bookable        1 133659906 50854
## - host_acceptance_rate    1 133664127 50854
## <none>                      133609135 50855
## - minimum_nights          1 133693885 50855
## - host_is_superhost       1 133774242 50857
## - reviews_per_month       1 133875012 50860
## - host_identity_verified  1 134098363 50867
## - availability_30         1 134135130 50868
## - review_scores_rating    1 134267861 50871
## - accommodates            1 134408179 50875
## - bedrooms                1 134486601 50878
## - bathrooms               1 136017601 50921
## - neighbourhood_cleansed 22 137931573 50932
## 
## Step:  AIC=50853.87
## price ~ host_is_superhost + host_acceptance_rate + accommodates + 
##     bathrooms + bedrooms + neighbourhood_cleansed + minimum_nights + 
##     instant_bookable + host_identity_verified + availability_30 + 
##     review_scores_rating + reviews_per_month
## 
##                          Df  Deviance   AIC
## - host_acceptance_rate    1 133686097 50853
## - instant_bookable        1 133705955 50853
## <none>                      133655440 50854
## - minimum_nights          1 133738786 50854
## - host_is_superhost       1 133814197 50856
## - reviews_per_month       1 133925694 50860
## - host_identity_verified  1 134151078 50866
## - availability_30         1 134200812 50867
## - review_scores_rating    1 134307304 50870
## - accommodates            1 134454315 50875
## - bedrooms                1 134531552 50877
## - bathrooms               1 136072762 50920
## - neighbourhood_cleansed 22 138011410 50932
## 
## Step:  AIC=50852.75
## price ~ host_is_superhost + accommodates + bathrooms + bedrooms + 
##     neighbourhood_cleansed + minimum_nights + instant_bookable + 
##     host_identity_verified + availability_30 + review_scores_rating + 
##     reviews_per_month
## 
##                          Df  Deviance   AIC
## - instant_bookable        1 133723993 50852
## <none>                      133686097 50853
## - minimum_nights          1 133759226 50853
## - host_is_superhost       1 133878636 50856
## - reviews_per_month       1 133931223 50858
## - host_identity_verified  1 134186504 50865
## - availability_30         1 134234688 50866
## - review_scores_rating    1 134337787 50869
## - accommodates            1 134514721 50874
## - bedrooms                1 134556734 50876
## - bathrooms               1 136085308 50919
## - neighbourhood_cleansed 22 138020508 50931
## 
## Step:  AIC=50851.83
## price ~ host_is_superhost + accommodates + bathrooms + bedrooms + 
##     neighbourhood_cleansed + minimum_nights + host_identity_verified + 
##     availability_30 + review_scores_rating + reviews_per_month
## 
##                          Df  Deviance   AIC
## <none>                      133723993 50852
## - minimum_nights          1 133801713 50852
## - host_is_superhost       1 133932502 50856
## - reviews_per_month       1 133993097 50858
## - host_identity_verified  1 134219758 50864
## - availability_30         1 134256600 50865
## - review_scores_rating    1 134405428 50869
## - accommodates            1 134537720 50873
## - bedrooms                1 134605798 50875
## - bathrooms               1 136121939 50918
## - neighbourhood_cleansed 22 138042976 50929
# Summary of the logistic regression model with stepwise selection
summary(logstep_model1)
## 
## Call:
## glm(formula = price ~ host_is_superhost + accommodates + bathrooms + 
##     bedrooms + neighbourhood_cleansed + minimum_nights + host_identity_verified + 
##     availability_30 + review_scores_rating + reviews_per_month, 
##     data = air_clean)
## 
## Coefficients:
##                                                 Estimate Std. Error t value
## (Intercept)                                    -112.0849    49.0409  -2.286
## host_is_superhostt                               15.7748     6.4926   2.430
## accommodates                                     14.0895     2.9354   4.800
## bathrooms                                        62.0460     7.5302   8.240
## bedrooms                                         36.9237     7.3898   4.997
## neighbourhood_cleansedDowntown                   19.7935    26.8794   0.736
## neighbourhood_cleansedDowntown Eastside           4.1203    29.3261   0.140
## neighbourhood_cleansedDunbar Southlands         -33.0215    31.8826  -1.036
## neighbourhood_cleansedFairview                   -4.7947    33.0391  -0.145
## neighbourhood_cleansedGrandview-Woodland        -29.9364    30.6737  -0.976
## neighbourhood_cleansedHastings-Sunrise          -57.0590    30.4421  -1.874
## neighbourhood_cleansedKensington-Cedar Cottage  -38.7957    28.9638  -1.339
## neighbourhood_cleansedKerrisdale                -60.4145    37.4025  -1.615
## neighbourhood_cleansedKillarney                 -80.8849    37.4822  -2.158
## neighbourhood_cleansedKitsilano                  23.9052    28.3344   0.844
## neighbourhood_cleansedMarpole                   -65.3931    32.7020  -2.000
## neighbourhood_cleansedMount Pleasant             31.4575    29.0042   1.085
## neighbourhood_cleansedOakridge                   -5.1763    35.4228  -0.146
## neighbourhood_cleansedRenfrew-Collingwood       -52.4513    29.9687  -1.750
## neighbourhood_cleansedRiley Park                -20.5983    29.2187  -0.705
## neighbourhood_cleansedShaughnessy               -20.1101    35.3966  -0.568
## neighbourhood_cleansedSouth Cambie              -24.6347    37.1773  -0.663
## neighbourhood_cleansedStrathcona                 -8.0929    46.5123  -0.174
## neighbourhood_cleansedSunset                    -56.7506    33.5279  -1.693
## neighbourhood_cleansedVictoria-Fraserview       -92.5127    34.6655  -2.669
## neighbourhood_cleansedWest End                   18.6769    28.6604   0.652
## neighbourhood_cleansedWest Point Grey            70.9888    36.6270   1.938
## minimum_nights                                    0.2237     0.1508   1.483
## host_identity_verifiedt                         -64.4253    17.1962  -3.746
## availability_30                                   1.2442     0.3204   3.883
## review_scores_rating                             33.7967     7.6944   4.392
## reviews_per_month                                -5.4174     1.9627  -2.760
##                                                            Pr(>|t|)    
## (Intercept)                                                0.022336 *  
## host_is_superhostt                                         0.015159 *  
## accommodates                                   0.000001649396864246 ***
## bathrooms                                      0.000000000000000236 ***
## bedrooms                                       0.000000609972329033 ***
## neighbourhood_cleansedDowntown                             0.461544    
## neighbourhood_cleansedDowntown Eastside                    0.888273    
## neighbourhood_cleansedDunbar Southlands                    0.300399    
## neighbourhood_cleansedFairview                             0.884622    
## neighbourhood_cleansedGrandview-Woodland                   0.329146    
## neighbourhood_cleansedHastings-Sunrise                     0.060960 .  
## neighbourhood_cleansedKensington-Cedar Cottage             0.180503    
## neighbourhood_cleansedKerrisdale                           0.106339    
## neighbourhood_cleansedKillarney                            0.030994 *  
## neighbourhood_cleansedKitsilano                            0.398902    
## neighbourhood_cleansedMarpole                              0.045608 *  
## neighbourhood_cleansedMount Pleasant                       0.278176    
## neighbourhood_cleansedOakridge                             0.883828    
## neighbourhood_cleansedRenfrew-Collingwood                  0.080164 .  
## neighbourhood_cleansedRiley Park                           0.480872    
## neighbourhood_cleansedShaughnessy                          0.569976    
## neighbourhood_cleansedSouth Cambie                         0.507610    
## neighbourhood_cleansedStrathcona                           0.861878    
## neighbourhood_cleansedSunset                               0.090606 .  
## neighbourhood_cleansedVictoria-Fraserview                  0.007646 ** 
## neighbourhood_cleansedWest End                             0.514659    
## neighbourhood_cleansedWest Point Grey                      0.052678 .  
## minimum_nights                                             0.138057    
## host_identity_verifiedt                                    0.000182 ***
## availability_30                                            0.000105 ***
## review_scores_rating                           0.000011519513171064 ***
## reviews_per_month                                          0.005804 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 35320.65)
## 
##     Null deviance: 171918871  on 3817  degrees of freedom
## Residual deviance: 133723993  on 3786  degrees of freedom
## AIC: 50852
## 
## Number of Fisher Scoring iterations: 2
p = ggplot(log_model, aes(y = log_model$model$price, x = log_model$model$accommodates)) + geom_point() + facet_wrap(~log_model$model$bedrooms)
print(p + labs(title = "Price compared to accommodation size split by bedrooms (Log Model 1)", y = "Price", x = "Accommodates"))

p = ggplot(log_model, aes(y = log_model$model$price, x = log_model$model$accommodates)) + geom_point() + facet_wrap(~log_model$model$bathrooms) + geom_boxplot(aes(group = log_model$model$accommodates))
print(p + labs(title = "Price compared to accommodation size split by bathrooms (Log Model 1)", y = "Price", x = "Accommodates")) 

p = ggplot(log_model, aes(y = log_model$model$price, x = log_model$model$bathrooms)) + geom_boxplot(aes(group = log_model$model$bathrooms))
print(p + labs(title = "Price compared to bathrooms (Log Model 1)", y = "Price", x = "Bathrooms"))

p = ggplot(log_model, aes(y = log_model$model$price, x = log_model$model$number_of_reviews)) + geom_point()
print(p + labs(title = "Price compared to the number of reviews (Log Model 1)", y = "Price", x = "Number of reviews"))

Logistic model with the same data as the model 4

library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
# Fit a logistic regression model using filtered data and significant neighbourhoods
log_model2 <- glm(price ~ ., data = air_clean_filtered)
summary(log_model2)
## 
## Call:
## glm(formula = price ~ ., data = air_clean_filtered)
## 
## Coefficients:
##                                                  Estimate Std. Error t value
## (Intercept)                                    -35.354771  29.905671  -1.182
## host_is_superhostt                              14.320777   2.509036   5.708
## host_response_timewithin a day                 -22.579428  19.995827  -1.129
## host_response_timewithin a few hours           -20.581804  21.175437  -0.972
## host_response_timewithin an hour               -19.771795  21.398596  -0.924
## host_response_rate                             -20.569007  21.858076  -0.941
## host_acceptance_rate                            15.377913   8.166207   1.883
## accommodates                                    10.729022   1.109471   9.670
## bathrooms                                       30.678979   2.889193  10.619
## bedrooms                                        37.140235   2.800603  13.262
## neighbourhood_cleansedDowntown                  39.845624  10.207125   3.904
## neighbourhood_cleansedDowntown Eastside         25.293042  11.088682   2.281
## neighbourhood_cleansedDunbar Southlands        -13.617895  12.062133  -1.129
## neighbourhood_cleansedFairview                  23.965913  12.439301   1.927
## neighbourhood_cleansedGrandview-Woodland        -2.465421  11.591317  -0.213
## neighbourhood_cleansedHastings-Sunrise         -31.576074  11.482826  -2.750
## neighbourhood_cleansedKensington-Cedar Cottage -19.747292  10.985093  -1.798
## neighbourhood_cleansedKerrisdale               -47.166174  14.157352  -3.332
## neighbourhood_cleansedKillarney                -51.747185  14.060599  -3.680
## neighbourhood_cleansedKitsilano                 41.462154  10.746738   3.858
## neighbourhood_cleansedMarpole                  -36.700292  12.301660  -2.983
## neighbourhood_cleansedMount Pleasant            17.959586  10.971467   1.637
## neighbourhood_cleansedOakridge                 -32.875063  13.534723  -2.429
## neighbourhood_cleansedRenfrew-Collingwood      -31.193802  11.338279  -2.751
## neighbourhood_cleansedRiley Park                -4.467922  11.073163  -0.403
## neighbourhood_cleansedShaughnessy              -14.477791  13.493965  -1.073
## neighbourhood_cleansedSouth Cambie              -2.689217  13.939127  -0.193
## neighbourhood_cleansedStrathcona                19.186889  17.353452   1.106
## neighbourhood_cleansedSunset                   -32.634434  12.651370  -2.580
## neighbourhood_cleansedVictoria-Fraserview      -59.517197  13.035596  -4.566
## neighbourhood_cleansedWest End                  38.655672  10.875146   3.554
## neighbourhood_cleansedWest Point Grey           18.198157  14.135005   1.287
## host_listings_count                              0.116980   0.037061   3.156
## minimum_nights                                  -0.645555   0.059340 -10.879
## maximum_nights                                  -0.002560   0.002487  -1.029
## instant_bookablet                               -6.340251   2.814059  -2.253
## host_identity_verifiedt                         -3.155202   6.490253  -0.486
## availability_30                                  1.162535   0.285523   4.072
## availability_60                                 -0.407248   0.280735  -1.451
## availability_90                                  0.084992   0.145345   0.585
## review_scores_rating                            24.291880   2.879319   8.437
## reviews_per_month                               -4.186840   0.841474  -4.976
## has_availabilityt                              -23.901324  20.165539  -1.185
## availability_365                                 0.037347   0.012469   2.995
## number_of_reviews                               -0.043240   0.017794  -2.430
##                                                            Pr(>|t|)    
## (Intercept)                                                0.237198    
## host_is_superhostt                                     0.0000000123 ***
## host_response_timewithin a day                             0.258883    
## host_response_timewithin a few hours                       0.331131    
## host_response_timewithin an hour                           0.355559    
## host_response_rate                                         0.346753    
## host_acceptance_rate                                       0.059763 .  
## accommodates                                   < 0.0000000000000002 ***
## bathrooms                                      < 0.0000000000000002 ***
## bedrooms                                       < 0.0000000000000002 ***
## neighbourhood_cleansedDowntown                         0.0000964083 ***
## neighbourhood_cleansedDowntown Eastside                    0.022606 *  
## neighbourhood_cleansedDunbar Southlands                    0.258979    
## neighbourhood_cleansedFairview                             0.054102 .  
## neighbourhood_cleansedGrandview-Woodland                   0.831576    
## neighbourhood_cleansedHastings-Sunrise                     0.005991 ** 
## neighbourhood_cleansedKensington-Cedar Cottage             0.072314 .  
## neighbourhood_cleansedKerrisdale                           0.000872 ***
## neighbourhood_cleansedKillarney                            0.000236 ***
## neighbourhood_cleansedKitsilano                            0.000116 ***
## neighbourhood_cleansedMarpole                              0.002870 ** 
## neighbourhood_cleansedMount Pleasant                       0.101728    
## neighbourhood_cleansedOakridge                             0.015190 *  
## neighbourhood_cleansedRenfrew-Collingwood                  0.005967 ** 
## neighbourhood_cleansedRiley Park                           0.686610    
## neighbourhood_cleansedShaughnessy                          0.283382    
## neighbourhood_cleansedSouth Cambie                         0.847028    
## neighbourhood_cleansedStrathcona                           0.268948    
## neighbourhood_cleansedSunset                               0.009932 ** 
## neighbourhood_cleansedVictoria-Fraserview              0.0000051382 ***
## neighbourhood_cleansedWest End                             0.000383 ***
## neighbourhood_cleansedWest Point Grey                      0.198016    
## host_listings_count                                        0.001610 ** 
## minimum_nights                                 < 0.0000000000000002 ***
## maximum_nights                                             0.303335    
## instant_bookablet                                          0.024313 *  
## host_identity_verifiedt                                    0.626893    
## availability_30                                        0.0000476631 ***
## availability_60                                            0.146962    
## availability_90                                            0.558746    
## review_scores_rating                           < 0.0000000000000002 ***
## reviews_per_month                                      0.0000006799 ***
## has_availabilityt                                          0.235992    
## availability_365                                           0.002761 ** 
## number_of_reviews                                          0.015144 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 4808.166)
## 
##     Null deviance: 37880796  on 3766  degrees of freedom
## Residual deviance: 17895994  on 3722  degrees of freedom
## AIC: 42674
## 
## Number of Fisher Scoring iterations: 2
# Perform stepwise selection on the logistic regression model
logstep_model2 <- step(log_model2)
## Start:  AIC=42673.91
## price ~ host_is_superhost + host_response_time + host_response_rate + 
##     host_acceptance_rate + accommodates + bathrooms + bedrooms + 
##     neighbourhood_cleansed + host_listings_count + minimum_nights + 
##     maximum_nights + instant_bookable + host_identity_verified + 
##     availability_30 + availability_60 + availability_90 + review_scores_rating + 
##     reviews_per_month + has_availability + availability_365 + 
##     number_of_reviews
## 
##                          Df Deviance   AIC
## - host_response_time      3 17904216 42670
## - host_identity_verified  1 17897130 42672
## - availability_90         1 17897638 42672
## - host_response_rate      1 17900252 42673
## - maximum_nights          1 17901089 42673
## - has_availability        1 17902749 42673
## <none>                      17895994 42674
## - availability_60         1 17906112 42674
## - host_acceptance_rate    1 17913044 42675
## - instant_bookable        1 17920402 42677
## - number_of_reviews       1 17924387 42678
## - availability_365        1 17939129 42681
## - host_listings_count     1 17943898 42682
## - availability_30         1 17975703 42689
## - reviews_per_month       1 18015028 42697
## - host_is_superhost       1 18052632 42705
## - review_scores_rating    1 18238227 42743
## - accommodates            1 18345637 42765
## - bathrooms               1 18438130 42784
## - minimum_nights          1 18465041 42790
## - bedrooms                1 18741595 42846
## - neighbourhood_cleansed 22 21182809 43265
## 
## Step:  AIC=42669.64
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + maximum_nights + instant_bookable + 
##     host_identity_verified + availability_30 + availability_60 + 
##     availability_90 + review_scores_rating + reviews_per_month + 
##     has_availability + availability_365 + number_of_reviews
## 
##                          Df Deviance   AIC
## - host_identity_verified  1 17905433 42668
## - availability_90         1 17906086 42668
## - maximum_nights          1 17909374 42669
## - has_availability        1 17911190 42669
## <none>                      17904216 42670
## - availability_60         1 17914440 42670
## - host_acceptance_rate    1 17925400 42672
## - instant_bookable        1 17926932 42672
## - number_of_reviews       1 17933049 42674
## - availability_365        1 17944967 42676
## - host_response_rate      1 17946279 42676
## - host_listings_count     1 17953052 42678
## - availability_30         1 17982220 42684
## - reviews_per_month       1 18022103 42692
## - host_is_superhost       1 18065616 42701
## - review_scores_rating    1 18252579 42740
## - accommodates            1 18356689 42762
## - bathrooms               1 18444964 42780
## - minimum_nights          1 18480067 42787
## - bedrooms                1 18749843 42841
## - neighbourhood_cleansed 22 21184640 43259
## 
## Step:  AIC=42667.89
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + maximum_nights + instant_bookable + 
##     availability_30 + availability_60 + availability_90 + review_scores_rating + 
##     reviews_per_month + has_availability + availability_365 + 
##     number_of_reviews
## 
##                          Df Deviance   AIC
## - availability_90         1 17907285 42666
## - maximum_nights          1 17910698 42667
## - has_availability        1 17912362 42667
## <none>                      17905433 42668
## - availability_60         1 17915726 42668
## - host_acceptance_rate    1 17926842 42670
## - instant_bookable        1 17928032 42671
## - number_of_reviews       1 17934989 42672
## - availability_365        1 17946216 42674
## - host_response_rate      1 17947810 42675
## - host_listings_count     1 17953646 42676
## - availability_30         1 17983819 42682
## - reviews_per_month       1 18023110 42691
## - host_is_superhost       1 18065655 42699
## - review_scores_rating    1 18254467 42739
## - accommodates            1 18357774 42760
## - bathrooms               1 18446500 42778
## - minimum_nights          1 18480963 42785
## - bedrooms                1 18750862 42840
## - neighbourhood_cleansed 22 21185347 43258
## 
## Step:  AIC=42666.28
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + maximum_nights + instant_bookable + 
##     availability_30 + availability_60 + review_scores_rating + 
##     reviews_per_month + has_availability + availability_365 + 
##     number_of_reviews
## 
##                          Df Deviance   AIC
## - maximum_nights          1 17912531 42665
## - has_availability        1 17914286 42666
## <none>                      17907285 42666
## - availability_60         1 17924060 42668
## - host_acceptance_rate    1 17929423 42669
## - instant_bookable        1 17930013 42669
## - number_of_reviews       1 17937286 42671
## - host_response_rate      1 17950193 42673
## - availability_365        1 17955830 42674
## - host_listings_count     1 17956824 42675
## - availability_30         1 17988953 42681
## - reviews_per_month       1 18024713 42689
## - host_is_superhost       1 18066061 42698
## - review_scores_rating    1 18256676 42737
## - accommodates            1 18359880 42758
## - bathrooms               1 18447599 42776
## - minimum_nights          1 18481995 42783
## - bedrooms                1 18751476 42838
## - neighbourhood_cleansed 22 21185408 43256
## 
## Step:  AIC=42665.38
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + instant_bookable + 
##     availability_30 + availability_60 + review_scores_rating + 
##     reviews_per_month + has_availability + availability_365 + 
##     number_of_reviews
## 
##                          Df Deviance   AIC
## - has_availability        1 17919772 42665
## <none>                      17912531 42665
## - availability_60         1 17929040 42667
## - host_acceptance_rate    1 17934381 42668
## - instant_bookable        1 17934786 42668
## - number_of_reviews       1 17945472 42670
## - host_response_rate      1 17955627 42672
## - availability_365        1 17957932 42673
## - host_listings_count     1 17961881 42674
## - availability_30         1 17995236 42681
## - reviews_per_month       1 18026571 42687
## - host_is_superhost       1 18069058 42696
## - review_scores_rating    1 18265553 42737
## - accommodates            1 18364841 42757
## - bathrooms               1 18454432 42776
## - minimum_nights          1 18500055 42785
## - bedrooms                1 18753719 42836
## - neighbourhood_cleansed 22 21186606 43254
## 
## Step:  AIC=42664.91
## price ~ host_is_superhost + host_response_rate + host_acceptance_rate + 
##     accommodates + bathrooms + bedrooms + neighbourhood_cleansed + 
##     host_listings_count + minimum_nights + instant_bookable + 
##     availability_30 + availability_60 + review_scores_rating + 
##     reviews_per_month + availability_365 + number_of_reviews
## 
##                          Df Deviance   AIC
## <none>                      17919772 42665
## - availability_60         1 17936455 42666
## - host_acceptance_rate    1 17941926 42668
## - instant_bookable        1 17942920 42668
## - number_of_reviews       1 17952956 42670
## - host_response_rate      1 17962586 42672
## - availability_365        1 17965835 42673
## - host_listings_count     1 17969121 42673
## - availability_30         1 18002958 42680
## - reviews_per_month       1 18034916 42687
## - host_is_superhost       1 18079095 42696
## - review_scores_rating    1 18271090 42736
## - accommodates            1 18375007 42757
## - bathrooms               1 18465918 42776
## - minimum_nights          1 18508838 42785
## - bedrooms                1 18755936 42835
## - neighbourhood_cleansed 22 21192590 43253
# Summary of the logistic regression model with stepwise selection
summary(logstep_model2)
## 
## Call:
## glm(formula = price ~ host_is_superhost + host_response_rate + 
##     host_acceptance_rate + accommodates + bathrooms + bedrooms + 
##     neighbourhood_cleansed + host_listings_count + minimum_nights + 
##     instant_bookable + availability_30 + availability_60 + review_scores_rating + 
##     reviews_per_month + availability_365 + number_of_reviews, 
##     data = air_clean_filtered)
## 
## Coefficients:
##                                                 Estimate Std. Error t value
## (Intercept)                                    -69.91018   20.69287  -3.378
## host_is_superhostt                              14.33211    2.48909   5.758
## host_response_rate                             -36.43895   12.20789  -2.985
## host_acceptance_rate                            16.86872    7.85642   2.147
## accommodates                                    10.78229    1.10781   9.733
## bathrooms                                       30.76770    2.88610  10.661
## bedrooms                                        36.87459    2.79545  13.191
## neighbourhood_cleansedDowntown                  39.74247   10.20097   3.896
## neighbourhood_cleansedDowntown Eastside         25.41006   11.08268   2.293
## neighbourhood_cleansedDunbar Southlands        -12.75132   12.04567  -1.059
## neighbourhood_cleansedFairview                  23.47804   12.42448   1.890
## neighbourhood_cleansedGrandview-Woodland        -1.79853   11.58297  -0.155
## neighbourhood_cleansedHastings-Sunrise         -31.50081   11.47586  -2.745
## neighbourhood_cleansedKensington-Cedar Cottage -19.38871   10.97832  -1.766
## neighbourhood_cleansedKerrisdale               -47.40085   14.15218  -3.349
## neighbourhood_cleansedKillarney                -51.28220   14.05060  -3.650
## neighbourhood_cleansedKitsilano                 41.30882   10.74097   3.846
## neighbourhood_cleansedMarpole                  -37.11291   12.29200  -3.019
## neighbourhood_cleansedMount Pleasant            18.08668   10.96627   1.649
## neighbourhood_cleansedOakridge                 -33.24005   13.50823  -2.461
## neighbourhood_cleansedRenfrew-Collingwood      -30.79953   11.32928  -2.719
## neighbourhood_cleansedRiley Park                -4.25545   11.06709  -0.385
## neighbourhood_cleansedShaughnessy              -13.68428   13.45162  -1.017
## neighbourhood_cleansedSouth Cambie              -2.41965   13.92502  -0.174
## neighbourhood_cleansedStrathcona                18.99607   17.33737   1.096
## neighbourhood_cleansedSunset                   -32.22891   12.64232  -2.549
## neighbourhood_cleansedVictoria-Fraserview      -58.82026   13.02703  -4.515
## neighbourhood_cleansedWest End                  38.35129   10.86815   3.529
## neighbourhood_cleansedWest Point Grey           18.39524   14.12524   1.302
## host_listings_count                              0.11816    0.03687   3.205
## minimum_nights                                  -0.65345    0.05902 -11.072
## instant_bookablet                               -6.12523    2.79082  -2.195
## availability_30                                  1.09260    0.26261   4.161
## availability_60                                 -0.25855    0.13876  -1.863
## review_scores_rating                            24.48425    2.86356   8.550
## reviews_per_month                               -4.05327    0.82805  -4.895
## availability_365                                 0.03690    0.01192   3.096
## number_of_reviews                               -0.04641    0.01766  -2.628
##                                                            Pr(>|t|)    
## (Intercept)                                                0.000736 ***
## host_is_superhostt                                     0.0000000092 ***
## host_response_rate                                         0.002855 ** 
## host_acceptance_rate                                       0.031848 *  
## accommodates                                   < 0.0000000000000002 ***
## bathrooms                                      < 0.0000000000000002 ***
## bedrooms                                       < 0.0000000000000002 ***
## neighbourhood_cleansedDowntown                         0.0000995303 ***
## neighbourhood_cleansedDowntown Eastside                    0.021916 *  
## neighbourhood_cleansedDunbar Southlands                    0.289859    
## neighbourhood_cleansedFairview                             0.058881 .  
## neighbourhood_cleansedGrandview-Woodland                   0.876614    
## neighbourhood_cleansedHastings-Sunrise                     0.006081 ** 
## neighbourhood_cleansedKensington-Cedar Cottage             0.077462 .  
## neighbourhood_cleansedKerrisdale                           0.000818 ***
## neighbourhood_cleansedKillarney                            0.000266 ***
## neighbourhood_cleansedKitsilano                            0.000122 ***
## neighbourhood_cleansedMarpole                              0.002551 ** 
## neighbourhood_cleansedMount Pleasant                       0.099170 .  
## neighbourhood_cleansedOakridge                             0.013911 *  
## neighbourhood_cleansedRenfrew-Collingwood                  0.006587 ** 
## neighbourhood_cleansedRiley Park                           0.700619    
## neighbourhood_cleansedShaughnessy                          0.309079    
## neighbourhood_cleansedSouth Cambie                         0.862061    
## neighbourhood_cleansedStrathcona                           0.273293    
## neighbourhood_cleansedSunset                               0.010834 *  
## neighbourhood_cleansedVictoria-Fraserview              0.0000065198 ***
## neighbourhood_cleansedWest End                             0.000423 ***
## neighbourhood_cleansedWest Point Grey                      0.192896    
## host_listings_count                                        0.001364 ** 
## minimum_nights                                 < 0.0000000000000002 ***
## instant_bookablet                                          0.028241 *  
## availability_30                                        0.0000324573 ***
## availability_60                                            0.062509 .  
## review_scores_rating                           < 0.0000000000000002 ***
## reviews_per_month                                      0.0000010248 ***
## availability_365                                           0.001976 ** 
## number_of_reviews                                          0.008629 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 4805.517)
## 
##     Null deviance: 37880796  on 3766  degrees of freedom
## Residual deviance: 17919772  on 3729  degrees of freedom
## AIC: 42665
## 
## Number of Fisher Scoring iterations: 2
# Create a list of your models
logmodels_list <- list(logstep_model1, logstep_model2)

stargazer(logmodels_list, type = "text", title = "Model Comparison",
          column.labels = c("Log Model 1", "Log Model 2"),
          header = FALSE, single.row = TRUE, font.size = "small")
## 
## Model Comparison
## ======================================================================================
##                                                          Dependent variable:          
##                                                ---------------------------------------
##                                                                 price                 
##                                                    Log Model 1         Log Model 2    
##                                                        (1)                 (2)        
## --------------------------------------------------------------------------------------
## host_is_superhostt                              15.775** (6.493)    14.332*** (2.489) 
## host_response_rate                                                 -36.439*** (12.208)
## host_acceptance_rate                                                16.869** (7.856)  
## accommodates                                    14.090*** (2.935)   10.782*** (1.108) 
## bathrooms                                       62.046*** (7.530)   30.768*** (2.886) 
## bedrooms                                        36.924*** (7.390)   36.875*** (2.795) 
## neighbourhood_cleansedDowntown                   19.793 (26.879)   39.742*** (10.201) 
## neighbourhood_cleansedDowntown Eastside          4.120 (29.326)     25.410** (11.083) 
## neighbourhood_cleansedDunbar Southlands         -33.021 (31.883)    -12.751 (12.046)  
## neighbourhood_cleansedFairview                   -4.795 (33.039)    23.478* (12.424)  
## neighbourhood_cleansedGrandview-Woodland        -29.936 (30.674)     -1.799 (11.583)  
## neighbourhood_cleansedHastings-Sunrise          -57.059* (30.442)  -31.501*** (11.476)
## neighbourhood_cleansedKensington-Cedar Cottage  -38.796 (28.964)    -19.389* (10.978) 
## neighbourhood_cleansedKerrisdale                -60.415 (37.403)   -47.401*** (14.152)
## neighbourhood_cleansedKillarney                -80.885** (37.482)  -51.282*** (14.051)
## neighbourhood_cleansedKitsilano                  23.905 (28.334)   41.309*** (10.741) 
## neighbourhood_cleansedMarpole                  -65.393** (32.702)  -37.113*** (12.292)
## neighbourhood_cleansedMount Pleasant             31.457 (29.004)    18.087* (10.966)  
## neighbourhood_cleansedOakridge                   -5.176 (35.423)   -33.240** (13.508) 
## neighbourhood_cleansedRenfrew-Collingwood       -52.451* (29.969)  -30.800*** (11.329)
## neighbourhood_cleansedRiley Park                -20.598 (29.219)     -4.255 (11.067)  
## neighbourhood_cleansedShaughnessy               -20.110 (35.397)    -13.684 (13.452)  
## neighbourhood_cleansedSouth Cambie              -24.635 (37.177)     -2.420 (13.925)  
## neighbourhood_cleansedStrathcona                 -8.093 (46.512)     18.996 (17.337)  
## neighbourhood_cleansedSunset                    -56.751* (33.528)  -32.229** (12.642) 
## neighbourhood_cleansedVictoria-Fraserview      -92.513*** (34.665) -58.820*** (13.027)
## neighbourhood_cleansedWest End                   18.677 (28.660)   38.351*** (10.868) 
## neighbourhood_cleansedWest Point Grey           70.989* (36.627)     18.395 (14.125)  
## host_listings_count                                                 0.118*** (0.037)  
## minimum_nights                                    0.224 (0.151)     -0.653*** (0.059) 
## host_identity_verifiedt                        -64.425*** (17.196)                    
## instant_bookablet                                                   -6.125** (2.791)  
## availability_30                                 1.244*** (0.320)    1.093*** (0.263)  
## availability_60                                                      -0.259* (0.139)  
## review_scores_rating                            33.797*** (7.694)   24.484*** (2.864) 
## reviews_per_month                               -5.417*** (1.963)   -4.053*** (0.828) 
## availability_365                                                    0.037*** (0.012)  
## number_of_reviews                                                   -0.046*** (0.018) 
## Constant                                       -112.085** (49.041) -69.910*** (20.693)
## --------------------------------------------------------------------------------------
## Observations                                          3,818               3,767       
## Log Likelihood                                     -25,393.910         -21,294.450    
## Akaike Inf. Crit.                                  50,851.830          42,664.910     
## ======================================================================================
## Note:                                                      *p<0.1; **p<0.05; ***p<0.01
p = ggplot(log_model2, aes(y = log_model2$model$price, x = log_model2$model$accommodates)) + geom_point() + facet_wrap(~log_model2$model$bedrooms)
print(p + labs(title = "Price compared to accommodation size split by bedrooms (Log Model 2)", y = "Price", x = "Accommodates"))

p = ggplot(log_model2, aes(y = log_model2$model$price, x = log_model2$model$accommodates)) + geom_point() + facet_wrap(~log_model2$model$bathrooms) + geom_boxplot(aes(group = log_model2$model$accommodates))
print(p + labs(title = "Price compared to accommodation size split by bathrooms (Log Model 2)", y = "Price", x = "Accommodates")) 

p = ggplot(log_model2, aes(y = log_model2$model$price, x = log_model2$model$bathrooms)) + geom_boxplot(aes(group = log_model2$model$bathrooms)) + geom_smooth()
print(p + labs(title = "Price compared to bathrooms (Log Model 2)", y = "Price", x = "Bathrooms"))
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

p = ggplot(log_model2, aes(y = log_model2$model$price, x = log_model2$model$number_of_reviews)) + geom_point()
print(p + labs(title = "Price compared to the number of reviews (Log Model 2)", y = "Price", x = "Number of reviews"))

Comparing of Models

# Load the stargazer package
library(stargazer)

# Create a list of your models
models_list <- list(step_model, step_model2, step_model3)

# Display the comparison using stargazer with smaller output
stargazer(models_list, type = "text", title = "Model Comparison",
          column.labels = c("Model 1", "Model 2", "Model 3"),
          header = FALSE, single.row = TRUE, font.size = "small")
## 
## Model Comparison
## =============================================================================================================================
##                                                                             Dependent variable:                              
##                                                ------------------------------------------------------------------------------
##                                                                                    price                                     
##                                                         Model 1                   Model 2                   Model 3          
##                                                           (1)                       (2)                       (3)            
## -----------------------------------------------------------------------------------------------------------------------------
## host_is_superhostt                                 15.775** (6.493)           15.802* (8.254)          13.853*** (2.483)     
## host_response_rate                                                                                    -36.381*** (12.233)    
## host_acceptance_rate                                                                                    16.201** (7.881)     
## accommodates                                       14.090*** (2.935)         13.486*** (3.702)         10.949*** (1.108)     
## bathrooms                                          62.046*** (7.530)        67.610*** (10.031)         31.087*** (2.890)     
## bedrooms                                           36.924*** (7.390)         30.971*** (9.488)         36.986*** (2.799)     
## neighbourhood_cleansedDowntown                      19.793 (26.879)                                    40.471*** (10.215)    
## neighbourhood_cleansedDowntown Eastside             4.120 (29.326)           -17.499 (15.841)          26.150** (11.101)     
## neighbourhood_cleansedDunbar Southlands            -33.021 (31.883)                                     -11.687 (12.065)     
## neighbourhood_cleansedFairview                      -4.795 (33.039)                                     23.800* (12.447)     
## neighbourhood_cleansedGrandview-Woodland           -29.936 (30.674)                                     -2.725 (11.599)      
## neighbourhood_cleansedHastings-Sunrise             -57.059* (30.442)        -73.211*** (18.515)       -31.742*** (11.497)    
## neighbourhood_cleansedKensington-Cedar Cottage     -38.796 (28.964)                                    -18.534* (10.996)     
## neighbourhood_cleansedKerrisdale                   -60.415 (37.403)         -74.843** (30.507)        -48.284*** (14.172)    
## neighbourhood_cleansedKillarney                   -80.885** (37.482)        -92.231*** (30.614)       -49.232*** (14.067)    
## neighbourhood_cleansedKitsilano                     23.905 (28.334)           9.953 (13.803)           42.071*** (10.760)    
## neighbourhood_cleansedMarpole                     -65.393** (32.702)        -81.246*** (22.617)       -36.482*** (12.312)    
## neighbourhood_cleansedMount Pleasant                31.457 (29.004)           15.946 (15.170)           18.553* (10.983)     
## neighbourhood_cleansedOakridge                      -5.176 (35.423)          -14.613 (27.603)          -33.292** (13.529)    
## neighbourhood_cleansedRenfrew-Collingwood          -52.451* (29.969)        -66.994*** (17.539)       -31.234*** (11.348)    
## neighbourhood_cleansedRiley Park                   -20.598 (29.219)                                     -3.551 (11.085)      
## neighbourhood_cleansedShaughnessy                  -20.110 (35.397)                                     -13.967 (13.471)     
## neighbourhood_cleansedSouth Cambie                 -24.635 (37.177)                                     -2.499 (13.949)      
## neighbourhood_cleansedStrathcona                    -8.093 (46.512)                                     19.798 (17.362)      
## neighbourhood_cleansedSunset                       -56.751* (33.528)        -70.423*** (24.190)        -32.036** (12.662)    
## neighbourhood_cleansedVictoria-Fraserview         -92.513*** (34.665)      -107.547*** (26.302)       -58.745*** (13.042)    
## neighbourhood_cleansedWest End                      18.677 (28.660)           -0.273 (14.134)          38.205*** (10.883)    
## neighbourhood_cleansedWest Point Grey              70.989* (36.627)                                     18.030 (14.151)      
## host_listings_count                                                                                     0.117*** (0.037)     
## minimum_nights                                       0.224 (0.151)           0.587*** (0.170)          -0.632*** (0.059)     
## instant_bookablet                                                             -13.185 (9.303)           -4.976* (2.778)      
## host_identity_verifiedt                           -64.425*** (17.196)       -64.441*** (23.422)                              
## availability_30                                    1.244*** (0.320)          1.474*** (0.419)           1.155*** (0.285)     
## availability_60                                                                                         -0.472* (0.280)      
## availability_90                                                                                          0.210 (0.140)       
## review_scores_rating                               33.797*** (7.694)         33.159*** (9.411)         24.363*** (2.863)     
## reviews_per_month                                  -5.417*** (1.963)                                   -5.131*** (0.752)     
## Constant                                          -112.085** (49.041)       -102.057** (51.708)       -69.213*** (20.637)    
## -----------------------------------------------------------------------------------------------------------------------------
## Observations                                             3,818                     2,803                     3,767           
## R2                                                       0.222                     0.172                     0.525           
## Adjusted R2                                              0.216                     0.166                     0.521           
## Residual Std. Error                               187.938 (df = 3786)       207.711 (df = 2781)        69.448 (df = 3730)    
## F Statistic                                    34.883*** (df = 31; 3786) 27.466*** (df = 21; 2781) 114.561*** (df = 36; 3730)
## =============================================================================================================================
## Note:                                                                                             *p<0.1; **p<0.05; ***p<0.01
options(scipen = 999)


# Diagnostic plot: Residuals vs Fitted
residuals_vs_fitted <- ggplot(air_clean_filtered, aes(x = fitted(step_model3), y = residuals(step_model3))) +
  geom_point() +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  xlab("Fitted Values") +
  ylab("Residuals") +
  ggtitle("Residuals vs Fitted Model 3")

# Show the plot
print(residuals_vs_fitted)

# Get the residuals from the model
residuals <- residuals(step_model3)

# Create a histogram of the residuals
hist(residuals, breaks = 30, main = "Histogram of Residuals OLS Model 3", xlab = "Residuals")

# Diagnostic plot: Residuals vs Fitted
residuals_vs_fitted2 <- ggplot(air_clean, aes(x = fitted(step_model), y = residuals(step_model))) +
  geom_point() +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  xlab("Fitted Values") +
  ylab("Residuals") +
  ggtitle("Residuals vs Fitted Model 1")

# Show the plot
print(residuals_vs_fitted2)

# Get the residuals from the model
residuals2 <- residuals(step_model)
# Create a histogram of the residuals
hist(residuals2, breaks = 30, main = "Histogram of Residuals OLS Model 1", xlab = "Residuals")

library(rpart)
library(caret)
## Warning: package 'caret' was built under R version 4.3.3
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 4.3.3
library(e1071)
## Warning: package 'e1071' was built under R version 4.3.3
# Calculate the median price
median_price <- median(air_clean_filtered$price)

# Create a binary variable based on median price
air_clean_filtered$price_binary <- ifelse(air_clean_filtered$price > median_price, "Above Median", "Below Median")

# Convert character columns to factors in the dataset
air_clean_filtered <- air_clean_filtered %>% mutate_if(is.character, as.factor)

# Set the seed for reproducibility
set.seed(123)

# Randomize the order of the data
air_clean_filtered <- air_clean_filtered[sample(nrow(air_clean_filtered)), ]

# Split the data into training (70%) and testing (30%) sets
train_index <- createDataPartition(air_clean_filtered$price, p = 0.7, list = FALSE)
train_data <- air_clean_filtered[train_index, ]
test_data <- air_clean_filtered[-train_index, ]

# Fit the decision tree model using the training data
tree_model <- rpart(price_binary ~  host_is_superhost + host_response_time + 
                      host_response_rate + host_acceptance_rate + 
                      accommodates + bathrooms + bedrooms + 
                      neighbourhood_cleansed + host_listings_count + 
                      minimum_nights + maximum_nights + instant_bookable + 
                      host_identity_verified + availability_30 + 
                      availability_60 + availability_90 + 
                      review_scores_rating + reviews_per_month , data = train_data)

# Visualize the decision tree
rpart.plot(tree_model)

# Convert character columns to factors in the dataset
air_clean_filtered <- air_clean_filtered %>% mutate_if(is.character, as.factor)



# Get the variable importance from the model
variable_importance <- tree_model$variable.importance

# Calculate the total importance sum
total_importance <- sum(variable_importance)

# Calculate the percentage importance for each variable
percentage_importance <- (variable_importance / total_importance) * 100

# Sort variable importance in descending order
sorted_percentage <- sort(percentage_importance, decreasing = TRUE)

# Print sorted percentage importance
print(sorted_percentage)
##               bedrooms           accommodates              bathrooms 
##            25.35976185            19.72044362            17.29235380 
## neighbourhood_cleansed         minimum_nights      reviews_per_month 
##            15.47136343             6.80647157             4.97305337 
##   host_acceptance_rate        availability_30    host_listings_count 
##             2.24625502             2.01425397             1.87239199 
##     host_response_time        availability_60        availability_90 
##             1.84168917             0.99998840             0.64275407 
##         maximum_nights   review_scores_rating     host_response_rate 
##             0.51170019             0.20504798             0.04247158
# Create a subset of the dataset with the selected input variables and the target variable of binary price

data_subset <- air_clean_filtered %>%
  select(c(selected_variables, "price_binary"))
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(selected_variables)
## 
##   # Now:
##   data %>% select(all_of(selected_variables))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Set up the training control for 10-fold cross-validation
train_control <- trainControl(method = "cv", number = 10)

# Define the grid of k values to test
k_values <- data.frame(k = c(1, 3, 5, 7, 9, 11,13))  # Adjust as needed

# Fit the KNN model with different k values and perform 10-fold cross-validation
knn_model <- train(price_binary ~ ., data = data_subset, method = "knn", trControl = train_control, tuneGrid = k_values)

# Plot model accuracy vs. Number of Neighbors (K)
plot(knn_model, main = "Model Accuracy vs. Number of Neighbors (K)", xlab = "Number of Neighbors (K)", ylab = "Accuracy")

# Train KNN model using caret and evaluate performance
knn_model <- train( price_binary~ ., data = train_data, method = "knn", trControl = train_control)
knn_predictions <- predict(knn_model, test_data)

 

# Compute the confusion matrix
knn_metrics <- confusionMatrix(knn_predictions, test_data$price_binary)
print(knn_metrics)
## Confusion Matrix and Statistics
## 
##               Reference
## Prediction     Above Median Below Median
##   Above Median          524           33
##   Below Median           36          535
##                                              
##                Accuracy : 0.9388             
##                  95% CI : (0.9232, 0.9521)   
##     No Information Rate : 0.5035             
##     P-Value [Acc > NIR] : <0.0000000000000002
##                                              
##                   Kappa : 0.8776             
##                                              
##  Mcnemar's Test P-Value : 0.8097             
##                                              
##             Sensitivity : 0.9357             
##             Specificity : 0.9419             
##          Pos Pred Value : 0.9408             
##          Neg Pred Value : 0.9370             
##              Prevalence : 0.4965             
##          Detection Rate : 0.4645             
##    Detection Prevalence : 0.4938             
##       Balanced Accuracy : 0.9388             
##                                              
##        'Positive' Class : Above Median       
## 
# Calculate precision and recall for the KNN model
knn_precision <- knn_metrics$byClass["Precision"]
knn_recall <- knn_metrics$byClass["Recall"]

print("KNN Metrics:")
## [1] "KNN Metrics:"
print(paste("Accuracy:", knn_metrics$overall["Accuracy"]))
## [1] "Accuracy: 0.938829787234043"
print(paste("Precision:", knn_precision))
## [1] "Precision: 0.940754039497307"
print(paste("Recall:", knn_recall))
## [1] "Recall: 0.935714285714286"
# Create a new data frame for the specific Airbnb listing
new_listing <- data.frame(
  host_is_superhost = "f",  # Assuming the host is not a superhost
  host_response_time = "within a day",  # Assuming response time within a day
  host_response_rate = 0.95,  # Assuming a 95% response rate
  host_acceptance_rate = 0.97,  # Assuming an 85% acceptance rate
  accommodates = 4,  # Number of people accommodated
  bathrooms = 1,  # Number of bathrooms
  bedrooms = 2,  # Number of bedrooms
  neighbourhood_cleansed = "Kitsilano",  # Neighborhood name
  host_listings_count = 3,  # Number of host listings
  minimum_nights = 2,  # Minimum nights required for booking
  maximum_nights = 30,  # Maximum nights allowed for booking
  instant_bookable = "f",  # Assuming instant booking is not available
  host_identity_verified = "t",  # Assuming host identity is verified
  availability_30 = 25,  # Availability in the next 30 days
  availability_60 = 50,  # Availability in the next 60 days
  availability_90 = 70,  # Availability in the next 90 days
  review_scores_rating = 4.5,  # Review scores rating
  reviews_per_month = 2,  # Reviews per month
  has_availability = "t",  # Assuming availability is true
  availability_365 = 300,  # Availability in the next 365 days
  number_of_reviews = 50   # Total number of reviews
  
)

# Predict prices using all models and print them
print("Predicted Price from Model 1:")
## [1] "Predicted Price from Model 1:"
print(predict(step_model, newdata = new_listing))
##        1 
## 212.4489
print("Predicted Price from Model 2:")
## [1] "Predicted Price from Model 2:"
print(predict(step_model2, newdata = new_listing))
##        1 
## 214.1977
print("Predicted Price from Model 3:")
## [1] "Predicted Price from Model 3:"
print(predict(step_model3, newdata = new_listing))
##        1 
## 221.2854
print("Predicted Price from Model 5:")
## [1] "Predicted Price from Model 5:"
print(predict(logstep_model1, newdata = new_listing))
##        1 
## 212.4489
print("Predicted Price from Model 6:")
## [1] "Predicted Price from Model 6:"
print(predict(logstep_model2, newdata = new_listing))
##        1 
## 225.0468

Distribution of price in different datasets

# Load the ggplot2 library
library(ggplot2)

# Create a histogram of the price distribution
ggplot(air_clean, aes(x = price)) +
  geom_histogram(binwidth = 25, color = "black", fill = "skyblue", alpha = 0.8) +
  labs(title = "Distribution of Price Model 1", x = "Price", y = "Frequency") +
  theme_minimal()

# Create a histogram of the price distribution
ggplot(air_clean_sig, aes(x = price)) +
  geom_histogram(binwidth = 25, color = "black", fill = "skyblue", alpha = 0.8) +
  labs(title = "Distribution of Price Model 2", x = "Price", y = "Frequency") +
  theme_minimal()

# Create a histogram of the price distribution
ggplot(air_clean_filtered, aes(x = price)) +
  geom_histogram(binwidth = 25, color = "black", fill = "skyblue", alpha = 0.8) +
  labs(title = "Distribution of Price Model 3", x = "Price", y = "Frequency") +
  theme_minimal()